Charles Chen пре 11 година
родитељ
комит
b1b01841af
2 измењених фајлова са 68 додато и 0 уклоњено
  1. 1 0
      youtube_dl/extractor/__init__.py
  2. 67 0
      youtube_dl/extractor/mlb.py

+ 1 - 0
youtube_dl/extractor/__init__.py

@@ -169,6 +169,7 @@ from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .metacritic import MetacriticIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mixcloud import MixcloudIE
 from .mixcloud import MixcloudIE
+from .mlb import MlbIE
 from .mpora import MporaIE
 from .mpora import MporaIE
 from .mofosex import MofosexIE
 from .mofosex import MofosexIE
 from .mooshare import MooshareIE
 from .mooshare import MooshareIE

+ 67 - 0
youtube_dl/extractor/mlb.py

@@ -0,0 +1,67 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class MlbIE(InfoExtractor):
+    _VALID_URL = r'http?://m\.mlb\.com/video/topic/[0-9]+/v(?P<id>n?\d+)/.*$'
+    _TEST = {
+        'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
+        'md5': u'd9c022c10d21f849f49c05ae12a8a7e9',
+        'info_dict': {
+            'id': '34496663',
+            'ext': 'mp4',
+            'format': 'mp4',
+            'description': "7/11/14: Giancarlo Stanton practices for the Home Run Derby prior to the game against the Mets",
+            'title': "Stanton prepares for Derby",
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage, default=video_id)
+        description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)"/>', webpage, 'description', fatal=False)
+        thumbnail = self._html_search_regex(r'<meta itemprop="image" (?:content|value)="(.*?)" />', webpage, 'image', fatal=False)
+        
+        # use the thumbnail URL to find the folder that contains the videos
+        _image_url = r'http://mediadownloads.mlb.com/mlbam/(?P<_date>n?.+)/images/.*$'
+        bobj = re.match(_image_url, thumbnail)
+        datestr = bobj.group('_date')
+        base_url = 'http://mediadownloads.mlb.com/mlbam/' + datestr
+        filespage = self._download_webpage(base_url, video_id)
+        
+        # Try 1800K, 1500K, 1200K, 600K, then 300K videos
+        video = self._html_search_regex(r'<li><a href="(.*?)_'+video_id+'_1800K.mp4"', filespage, '1800K', fatal=False)
+        if video is not None:
+            video_url = base_url+'/'+video+'_'+video_id+'_1800K.mp4'
+        else:
+            video = self._html_search_regex(r'<li><a href="(.*?)_'+video_id+'_1500K.mp4"', filespage, '1500K', fatal=False)
+            if video is not None:
+                video_url = base_url+'/'+video+'_'+video_id+'_1500K.mp4'
+            else:
+                video = self._html_search_regex(r'<li><a href="(.*?)_'+video_id+'_600K.mp4"', filespage, '600K', fatal=False)
+                if video is not None:
+                    video_url = base_url+'/'+video+'_'+video_id+'_600K.mp4'
+                else:
+                    video = self._html_search_regex(r'<li><a href="(.*?)_'+video_id+'_300K.mp4"', filespage, 'MLB', fatal=False)
+                    if video is not None:
+                        video_url = base_url+'/'+video+'_'+video_id+'_300K.mp4'
+                    else:
+                        # nothing valuable to return
+                        return None
+                
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'ext': 'mp4',
+            'format': 'mp4',
+            'description': description,
+            'thumbnail': thumbnail,
+        }