Browse Source

[extractor/common] Support HTML media elements without child nodes

Yen Chi Hsuan 9 years ago
parent
commit
cea364f70c
2 changed files with 6 additions and 1 deletions
  1. 1 0
      ChangeLog
  2. 5 1
      youtube_dl/extractor/common.py

+ 1 - 0
ChangeLog

@@ -1,6 +1,7 @@
 version <unreleased>
 version <unreleased>
 
 
 Core
 Core
++ Support HTML media elements without child nodes
 * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387)
 * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387)
 
 
 Extractors
 Extractors

+ 5 - 1
youtube_dl/extractor/common.py

@@ -1802,7 +1802,11 @@ class InfoExtractor(object):
             return is_plain_url, formats
             return is_plain_url, formats
 
 
         entries = []
         entries = []
-        for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
+        media_tags = [(media_tag, media_type, '')
+                      for media_tag, media_type
+                      in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
+        media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
+        for media_tag, media_type, media_content in media_tags:
             media_info = {
             media_info = {
                 'formats': [],
                 'formats': [],
                 'subtitles': {},
                 'subtitles': {},