浏览代码

Merge pull request #7599 from lalinsky/fix-youtube

[youtube] More explicit player config JSON extraction (fixes #7468)
Sergey M 9 年之前
父节点
当前提交
5ae17037a3
共有 1 个文件被更改,包括 30 次插入8 次删除
  1. 30 8
      youtube_dl/extractor/youtube.py

+ 30 - 8
youtube_dl/extractor/youtube.py

@@ -691,7 +691,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         {
         {
             'url': 'http://vid.plus/FlRa-iH7PGw',
             'url': 'http://vid.plus/FlRa-iH7PGw',
             'only_matching': True,
             'only_matching': True,
-        }
+        },
+        {
+            # Title with JS-like syntax "};"
+            'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
+            'info_dict': {
+                'id': 'lsguqyKfVQg',
+                'ext': 'mp4',
+                'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
+                'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
+                'upload_date': '20151119',
+                'uploader_id': 'IronSoulElf',
+                'uploader': 'IronSoulElf',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
     ]
     ]
 
 
     def __init__(self, *args, **kwargs):
     def __init__(self, *args, **kwargs):
@@ -875,16 +891,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             return {}
             return {}
         return sub_lang_list
         return sub_lang_list
 
 
+    def _get_ytplayer_config(self, webpage):
+        patterns = [
+            r';ytplayer\.config\s*=\s*({.*?});ytplayer',
+            r';ytplayer\.config\s*=\s*({.*?});',
+        ]
+        config = self._search_regex(patterns, webpage, 'ytconfig.player', default=None)
+        if config is not None:
+            return json.loads(uppercase_escape(config))
+
     def _get_automatic_captions(self, video_id, webpage):
     def _get_automatic_captions(self, video_id, webpage):
         """We need the webpage for getting the captions url, pass it as an
         """We need the webpage for getting the captions url, pass it as an
            argument to speed up the process."""
            argument to speed up the process."""
         self.to_screen('%s: Looking for automatic captions' % video_id)
         self.to_screen('%s: Looking for automatic captions' % video_id)
-        mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
+        player_config = self._get_ytplayer_config(webpage)
         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
-        if mobj is None:
+        if player_config is None:
             self._downloader.report_warning(err_msg)
             self._downloader.report_warning(err_msg)
             return {}
             return {}
-        player_config = json.loads(mobj.group(1))
         try:
         try:
             args = player_config['args']
             args = player_config['args']
             caption_url = args['ttsurl']
             caption_url = args['ttsurl']
@@ -1091,10 +1115,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             age_gate = False
             age_gate = False
             video_info = None
             video_info = None
             # Try looking directly into the video webpage
             # Try looking directly into the video webpage
-            mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
-            if mobj:
-                json_code = uppercase_escape(mobj.group(1))
-                ytplayer_config = json.loads(json_code)
+            ytplayer_config = self._get_ytplayer_config(video_webpage)
+            if ytplayer_config is not None:
                 args = ytplayer_config['args']
                 args = ytplayer_config['args']
                 if args.get('url_encoded_fmt_stream_map'):
                 if args.get('url_encoded_fmt_stream_map'):
                     # Convert to the same format returned by compat_parse_qs
                     # Convert to the same format returned by compat_parse_qs