ソースを参照

Merge remote-tracking branch 'origin/master'

Philipp Hagemeister 11 年 前
コミット
45c85d7ba1

+ 5 - 0
youtube_dl/downloader/f4m.py

@@ -16,6 +16,7 @@ from ..utils import (
     format_bytes,
     format_bytes,
     encodeFilename,
     encodeFilename,
     sanitize_open,
     sanitize_open,
+    xpath_text,
 )
 )
 
 
 
 
@@ -251,6 +252,8 @@ class F4mFD(FileDownloader):
             # We only download the first fragment
             # We only download the first fragment
             fragments_list = fragments_list[:1]
             fragments_list = fragments_list[:1]
         total_frags = len(fragments_list)
         total_frags = len(fragments_list)
+        # For some akamai manifests we'll need to add a query to the fragment url
+        akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
 
 
         tmpfilename = self.temp_name(filename)
         tmpfilename = self.temp_name(filename)
         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
@@ -290,6 +293,8 @@ class F4mFD(FileDownloader):
         for (seg_i, frag_i) in fragments_list:
         for (seg_i, frag_i) in fragments_list:
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             url = base_url + name
             url = base_url + name
+            if akamai_pv:
+                url += '?' + akamai_pv.strip(';')
             frag_filename = '%s-%s' % (tmpfilename, name)
             frag_filename = '%s-%s' % (tmpfilename, name)
             success = http_dl.download(frag_filename, {'url': url})
             success = http_dl.download(frag_filename, {'url': url})
             if not success:
             if not success:

+ 4 - 1
youtube_dl/extractor/__init__.py

@@ -249,7 +249,10 @@ from .nosvideo import NosVideoIE
 from .novamov import NovaMovIE
 from .novamov import NovaMovIE
 from .nowness import NownessIE
 from .nowness import NownessIE
 from .nowvideo import NowVideoIE
 from .nowvideo import NowVideoIE
-from .npo import NPOIE
+from .npo import (
+    NPOIE,
+    TegenlichtVproIE,
+)
 from .nrk import (
 from .nrk import (
     NRKIE,
     NRKIE,
     NRKTVIE,
     NRKTVIE,

+ 2 - 2
youtube_dl/extractor/nbc.py

@@ -16,9 +16,9 @@ class NBCIE(InfoExtractor):
 
 
     _TEST = {
     _TEST = {
         'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
         'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
-        'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
+        # md5 checksum is not stable
         'info_dict': {
         'info_dict': {
-            'id': 'u1RInQZRN7QJ',
+            'id': 'bTmnLCvIbaaH',
             'ext': 'flv',
             'ext': 'flv',
             'title': 'I Am a Firefighter',
             'title': 'I Am a Firefighter',
             'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
             'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',

+ 30 - 0
youtube_dl/extractor/npo.py

@@ -7,6 +7,7 @@ from ..utils import (
     unified_strdate,
     unified_strdate,
     parse_duration,
     parse_duration,
     qualities,
     qualities,
+    url_basename,
 )
 )
 
 
 
 
@@ -55,7 +56,9 @@ class NPOIE(InfoExtractor):
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         video_id = mobj.group('id')
+        return self._get_info(video_id)
 
 
+    def _get_info(self, video_id):
         metadata = self._download_json(
         metadata = self._download_json(
             'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
             'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
             video_id,
             video_id,
@@ -106,3 +109,30 @@ class NPOIE(InfoExtractor):
             'duration': parse_duration(metadata.get('tijdsduur')),
             'duration': parse_duration(metadata.get('tijdsduur')),
             'formats': formats,
             'formats': formats,
         }
         }
+
+
+class TegenlichtVproIE(NPOIE):
+    IE_NAME = 'tegenlicht.vpro.nl'
+    _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
+
+    _TESTS = [
+        {
+            'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
+            'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
+            'info_dict': {
+                'id': 'VPWON_1169289',
+                'ext': 'm4v',
+                'title': 'Tegenlicht',
+                'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
+                'upload_date': '20130225',
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        name = url_basename(url)
+        webpage = self._download_webpage(url, name)
+        urn = self._html_search_meta('mediaurn', webpage)
+        info_page = self._download_json(
+            'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
+        return self._get_info(info_page['mid'])

+ 1 - 1
youtube_dl/extractor/sbs.py

@@ -21,7 +21,7 @@ class SBSIE(InfoExtractor):
         'md5': '3150cf278965eeabb5b4cea1c963fe0a',
         'md5': '3150cf278965eeabb5b4cea1c963fe0a',
         'info_dict': {
         'info_dict': {
             'id': '320403011771',
             'id': '320403011771',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Dingo Conservation',
             'title': 'Dingo Conservation',
             'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
             'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
             'thumbnail': 're:http://.*\.jpg',
             'thumbnail': 're:http://.*\.jpg',

+ 31 - 22
youtube_dl/extractor/theplatform.py

@@ -5,6 +5,7 @@ import json
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
+    compat_str,
     ExtractorError,
     ExtractorError,
     xpath_with_ns,
     xpath_with_ns,
 )
 )
@@ -55,36 +56,44 @@ class ThePlatformIE(InfoExtractor):
         body = meta.find(_x('smil:body'))
         body = meta.find(_x('smil:body'))
 
 
         f4m_node = body.find(_x('smil:seq//smil:video'))
         f4m_node = body.find(_x('smil:seq//smil:video'))
-        if f4m_node is not None:
+        if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
             f4m_url = f4m_node.attrib['src']
             f4m_url = f4m_node.attrib['src']
             if 'manifest.f4m?' not in f4m_url:
             if 'manifest.f4m?' not in f4m_url:
                 f4m_url += '?'
                 f4m_url += '?'
             # the parameters are from syfy.com, other sites may use others,
             # the parameters are from syfy.com, other sites may use others,
             # they also work for nbc.com
             # they also work for nbc.com
             f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
             f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
-            formats = [{
-                'ext': 'flv',
-                'url': f4m_url,
-            }]
+            formats = self._extract_f4m_formats(f4m_url, video_id)
         else:
         else:
-            base_url = head.find(_x('smil:meta')).attrib['base']
-            switch = body.find(_x('smil:switch'))
             formats = []
             formats = []
-            for f in switch.findall(_x('smil:video')):
-                attr = f.attrib
-                width = int(attr['width'])
-                height = int(attr['height'])
-                vbr = int(attr['system-bitrate']) // 1000
-                format_id = '%dx%d_%dk' % (width, height, vbr)
-                formats.append({
-                    'format_id': format_id,
-                    'url': base_url,
-                    'play_path': 'mp4:' + attr['src'],
-                    'ext': 'flv',
-                    'width': width,
-                    'height': height,
-                    'vbr': vbr,
-                })
+            switch = body.find(_x('smil:switch'))
+            if switch is not None:
+                base_url = head.find(_x('smil:meta')).attrib['base']
+                for f in switch.findall(_x('smil:video')):
+                    attr = f.attrib
+                    width = int(attr['width'])
+                    height = int(attr['height'])
+                    vbr = int(attr['system-bitrate']) // 1000
+                    format_id = '%dx%d_%dk' % (width, height, vbr)
+                    formats.append({
+                        'format_id': format_id,
+                        'url': base_url,
+                        'play_path': 'mp4:' + attr['src'],
+                        'ext': 'flv',
+                        'width': width,
+                        'height': height,
+                        'vbr': vbr,
+                    })
+            else:
+                switch = body.find(_x('smil:seq//smil:switch'))
+                for f in switch.findall(_x('smil:video')):
+                    attr = f.attrib
+                    vbr = int(attr['system-bitrate']) // 1000
+                    formats.append({
+                        'format_id': compat_str(vbr),
+                        'url': attr['src'],
+                        'vbr': vbr,
+                    })
             self._sort_formats(formats)
             self._sort_formats(formats)
 
 
         return {
         return {