12 years ago · db1f388878
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -1,3 +1,5 @@
 
															+from __future__ import unicode_literals
														
 
															+
														
 
															 from .common import FileDownloader
														
 
															 from .hls import HlsFD
														
 
															 from .http import HttpFD
														
@@ -12,10 +14,11 @@ from ..utils import (
 
															 def get_suitable_downloader(info_dict):
														
 
															     """Get the downloader class that can handle the info dict."""
														
 
															     url = info_dict['url']
														
 
															+    protocol = info_dict.get('protocol')
														
 
															     if url.startswith('rtmp'):
														
 
															         return RtmpFD
														
 
															-    if determine_ext(url) == u'm3u8':
														
 
															+    if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
														
 
															         return HlsFD
														
 
															     if url.startswith('mms') or url.startswith('rtsp'):
														
 
															         return MplayerFD
														
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -83,6 +83,7 @@ from .googlesearch import GoogleSearchIE
 
															 from .hark import HarkIE
														
 
															 from .hotnewhiphop import HotNewHipHopIE
														
 
															 from .howcast import HowcastIE
														
 
															+from .huffpost import HuffPostIE
														
 
															 from .hypem import HypemIE
														
 
															 from .ign import IGNIE, OneUPIE
														
 
															 from .imdb import (
														
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -71,7 +71,7 @@ class InfoExtractor(object):
 
															                     * player_url SWF Player URL (used for rtmpdump).
														
 
															                     * protocol   The protocol that will be used for the actual
														
 
															                                  download, lower-case.
														
 
															-                                 "http", "https", "rtsp", "rtmp" or so.
														
 
															+                                 "http", "https", "rtsp", "rtmp", "m3u8" or so.
														
 
															                     * preference Order number of this format. If this field is
														
 
															                                  present and not None, the formats get sorted
														
 
															                                  by this field.
														
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -332,10 +332,16 @@ class GenericIE(InfoExtractor):
 
															         # Look for embedded Facebook player
														
 
															         mobj = re.search(
														
 
															-            r'<iframe[^>]+?src=(["\'])(?P<url>https://www.facebook.com/video/embed.+?)\1', webpage)
														
 
															+            r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
														
 
															         if mobj is not None:
														
 
															             return self.url_result(mobj.group('url'), 'Facebook')
														
 
															+        # Look for embedded Huffington Post player
														
 
															+        mobj = re.search(
														
 
															+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
														
 
															+        if mobj is not None:
														
 
															+            return self.url_result(mobj.group('url'), 'HuffPost')
														
 
															+
														
 
															         # Start with something easy: JW Player in SWFObject
														
 
															         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
														
 
															         if mobj is None:
														
--- a/youtube_dl/extractor/huffpost.py
+++ b/youtube_dl/extractor/huffpost.py
@@ -0,0 +1,70 @@
 
															+from __future__ import unicode_literals
														
 
															+
														
 
															+import re
														
 
															+
														
 
															+from .common import InfoExtractor
														
 
															+from ..utils import (
														
 
															+    parse_duration,
														
 
															+    unified_strdate,
														
 
															+)
														
 
															+
														
 
															+
														
 
															+class HuffPostIE(InfoExtractor):
														
 
															+    IE_DESC = 'Huffington Post'
														
 
															+    _VALID_URL = r'''(?x)
														
 
															+        https?://(embed\.)?live\.huffingtonpost\.com/
														
 
															+        (?:
														
 
															+            r/segment/[^/]+/|
														
 
															+            HPLEmbedPlayer/\?segmentId=
														
 
															+        )
														
 
															+        (?P<id>[0-9a-f]+)'''
														
 
															+
														
 
															+    _TEST = {
														
 
															+        'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
														
 
															+        'file': '52dd3e4b02a7602131000677.mp4',
														
 
															+        'md5': 'TODO',
														
 
															+        'info_dict': {
														
 
															+            'title': 'TODO',
														
 
															+            'description': 'TODO',
														
 
															+            'duration': 1549,
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    def _real_extract(self, url):
														
 
															+        mobj = re.match(self._VALID_URL, url)
														
 
															+        video_id = mobj.group('id')
														
 
															+
														
 
															+        api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
														
 
															+        data = self._download_json(api_url, video_id)['data']
														
 
															+
														
 
															+        video_title = data['title']
														
 
															+        duration = parse_duration(data['running_time'])
														
 
															+        upload_date = unified_strdate(data['schedule']['started_at'])
														
 
															+
														
 
															+        thumbnails = []
														
 
															+        for url in data['images'].values():
														
 
															+            m = re.match('.*-([0-9]+x[0-9]+)\.', url)
														
 
															+            if not m:
														
 
															+                continue
														
 
															+            thumbnails.append({
														
 
															+                'url': url,
														
 
															+                'resolution': m.group(1),
														
 
															+            })
														
 
															+
														
 
															+        formats = [{
														
 
															+            'format': key,
														
 
															+            'format_id': key.replace('/', '.'),
														
 
															+            'ext': 'mp4',
														
 
															+            'url': url,
														
 
															+            'vcodec': 'none' if key.startswith('audio/') else None,
														
 
															+        } for key, url in data['sources']['live'].items()]
														
 
															+        self._sort_formats(formats)
														
 
															+
														
 
															+        return {
														
 
															+            'id': video_id,
														
 
															+            'title': video_title,
														
 
															+            'formats': formats,
														
 
															+            'duration': duration,
														
 
															+            'upload_date': upload_date,
														
 
															+            'thumbnails': thumbnails,
														
 
															+        }