浏览代码

[brightcove] Parse URL from meta element if available (Fixes #2253)

Philipp Hagemeister 11 年之前
父节点
当前提交
117bec936c
共有 1 个文件被更改,包括 19 次插入3 次删除
  1. 19 3
      youtube_dl/extractor/brightcove.py

+ 19 - 3
youtube_dl/extractor/brightcove.py

@@ -23,7 +23,6 @@ from ..utils import (
 class BrightcoveIE(InfoExtractor):
 class BrightcoveIE(InfoExtractor):
     _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
     _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
-    _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
 
 
     _TESTS = [
     _TESTS = [
         {
         {
@@ -71,6 +70,17 @@ class BrightcoveIE(InfoExtractor):
                 'uploader': 'National Ballet of Canada',
                 'uploader': 'National Ballet of Canada',
             },
             },
         },
         },
+        {
+            # https://github.com/rg3/youtube-dl/issues/2253
+            'url': 'http://v.thestar.com/services/player/bcpid2071349530001?bckey=AQ~~,AAAAuO4KaJE~,gatFNwSKdGDmDpIYqNJ-fTHn_c4z_LH_&bctid=3101154703001',
+            'file': '3101154703001.mp4',
+            'md5': '0ba9446db037002366bab3b3eb30c88c',
+            'info_dict': {
+                'title': 'Still no power',
+                'uploader': 'thestar.com',
+                'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
+            }
+        }
     ]
     ]
 
 
     @classmethod
     @classmethod
@@ -131,6 +141,11 @@ class BrightcoveIE(InfoExtractor):
         """Try to extract the brightcove url from the wepbage, returns None
         """Try to extract the brightcove url from the wepbage, returns None
         if it can't be found
         if it can't be found
         """
         """
+
+        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
+        if url_m:
+            return url_m.group(1)
+
         m_brightcove = re.search(
         m_brightcove = re.search(
             r'''(?sx)<object
             r'''(?sx)<object
             (?:
             (?:
@@ -183,8 +198,9 @@ class BrightcoveIE(InfoExtractor):
         return self._extract_video_info(video_info)
         return self._extract_video_info(video_info)
 
 
     def _get_playlist_info(self, player_key):
     def _get_playlist_info(self, player_key):
-        playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
-                                               player_key, 'Downloading playlist information')
+        info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
+        playlist_info = self._download_webpage(
+            info_url, player_key, 'Downloading playlist information')
 
 
         json_data = json.loads(playlist_info)
         json_data = json.loads(playlist_info)
         if 'videoList' not in json_data:
         if 'videoList' not in json_data: