Selaa lähdekoodia

Merge remote-tracking branch 'upstream/master'

rupertbaxter2 11 vuotta sitten
vanhempi
sitoutus
f96252b913

+ 1 - 0
test/test_all_urls.py

@@ -99,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase):
 
     def test_facebook_matching(self):
         self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
 
     def test_no_duplicates(self):
         ies = gen_extractors()

+ 1 - 1
test/test_utils.py

@@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase):
         d = json.loads(stripped)
         self.assertEqual(d, [{"id": "532cb", "x": 3}])
 
-    def test_uppercase_escpae(self):
+    def test_uppercase_escape(self):
         self.assertEqual(uppercase_escape(u'aä'), u'aä')
         self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
 

+ 1 - 0
youtube_dl/__init__.py

@@ -68,6 +68,7 @@ __authors__  = (
     'Hassaan Ali',
     'Dobrosław Żybort',
     'David Fabijan',
+    'Sebastian Haas',
 )
 
 __license__ = 'Public Domain'

+ 1 - 1
youtube_dl/downloader/common.py

@@ -295,7 +295,7 @@ class FileDownloader(object):
 
     def real_download(self, filename, info_dict):
         """Real download process. Redefine in subclasses."""
-        raise NotImplementedError(u'This method must be implemented by sublcasses')
+        raise NotImplementedError(u'This method must be implemented by subclasses')
 
     def _hook_progress(self, status):
         for ph in self._progress_hooks:

+ 5 - 2
youtube_dl/extractor/__init__.py

@@ -225,9 +225,12 @@ from .nrk import (
 from .ntv import NTVIE
 from .nytimes import NYTimesIE
 from .nuvid import NuvidIE
-from .oe1 import OE1IE
 from .ooyala import OoyalaIE
-from .orf import ORFIE
+from .orf import (
+    ORFTVthekIE,
+    ORFOE1IE,
+    ORFFM4IE,
+)
 from .parliamentliveuk import ParliamentLiveUKIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE

+ 3 - 2
youtube_dl/extractor/appletrailers.py

@@ -6,6 +6,7 @@ import json
 from .common import InfoExtractor
 from ..utils import (
     compat_urlparse,
+    int_or_none,
 )
 
 
@@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor):
                 formats.append({
                     'url': format_url,
                     'format': format['type'],
-                    'width': format['width'],
-                    'height': int(format['height']),
+                    'width': int_or_none(format['width']),
+                    'height': int_or_none(format['height']),
                 })
 
             self._sort_formats(formats)

+ 3 - 0
youtube_dl/extractor/ard.py

@@ -51,6 +51,9 @@ class ARDIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
+        if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
+            raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+
         title = self._html_search_regex(
             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
              r'<meta name="dcterms.title" content="(.*?)"/>',

+ 9 - 5
youtube_dl/extractor/arte.py

@@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor):
             regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
             return any(re.match(r, f['versionCode']) for r in regexes)
         # Some formats may not be in the same language as the url
+        # TODO: Might want not to drop videos that does not match requested language
+        # but to process those formats with lower precedence
         formats = filter(_match_lang, all_formats)
-        formats = list(formats) # in python3 filter returns an iterator
+        formats = list(formats)  # in python3 filter returns an iterator
         if not formats:
             # Some videos are only available in the 'Originalversion'
             # they aren't tagged as being in French or German
-            if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
-                formats = all_formats
-            else:
-                raise ExtractorError(u'The formats list is empty')
+            # Sometimes there are neither videos of requested lang code
+            # nor original version videos available
+            # For such cases we just take all_formats as is
+            formats = all_formats
+            if not formats:
+                raise ExtractorError('The formats list is empty')
 
         if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
             def sort_key(f):

+ 1 - 1
youtube_dl/extractor/facebook.py

@@ -20,7 +20,7 @@ from ..utils import (
 class FacebookIE(InfoExtractor):
     _VALID_URL = r'''(?x)
         https?://(?:\w+\.)?facebook\.com/
-        (?:[^#?]*\#!/)?
+        (?:[^#]*?\#!/)?
         (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
         (?:v|video_id)=(?P<id>[0-9]+)
         (?:.*)'''

+ 0 - 40
youtube_dl/extractor/oe1.py

@@ -1,40 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import calendar
-import datetime
-import re
-
-from .common import InfoExtractor
-
-# audios on oe1.orf.at are only available for 7 days, so we can't
-# add tests.
-
-
-class OE1IE(InfoExtractor):
-    IE_DESC = 'oe1.orf.at'
-    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        show_id = mobj.group('id')
-
-        data = self._download_json(
-            'http://oe1.orf.at/programm/%s/konsole' % show_id,
-            show_id
-        )
-
-        timestamp = datetime.datetime.strptime('%s %s' % (
-            data['item']['day_label'],
-            data['item']['time']
-        ), '%d.%m.%Y %H:%M')
-        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
-
-        return {
-            'id': show_id,
-            'title': data['item']['title'],
-            'url': data['item']['url_stream'],
-            'ext': 'mp3',
-            'description': data['item'].get('info'),
-            'timestamp': unix_timestamp
-        }

+ 49 - 17
youtube_dl/extractor/ooyala.py

@@ -3,23 +3,38 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import unescapeHTML
+from ..utils import (
+    unescapeHTML,
+    ExtractorError,
+)
 
 
 class OoyalaIE(InfoExtractor):
     _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
 
-    _TEST = {
-        # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
-        'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
-        'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
-        'info_dict': {
-            'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
-            'ext': 'mp4',
-            'title': 'Explaining Data Recovery from Hard Drives and SSDs',
-            'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+    _TESTS = [
+        {
+            # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
+            'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+            'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
+            'info_dict': {
+                'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+                'ext': 'mp4',
+                'title': 'Explaining Data Recovery from Hard Drives and SSDs',
+                'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+            },
+        }, {
+            # Only available for ipad
+            'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
+            'md5': '4b9754921fddb68106e48c142e2a01e6',
+            'info_dict': {
+                'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
+                'ext': 'mp4',
+                'title': 'Simulation Overview - Levels of Simulation',
+                'description': '',
+            },
         },
-    }
+    ]
 
     @staticmethod
     def _url_for_embed_code(embed_code):
@@ -47,13 +62,30 @@ class OoyalaIE(InfoExtractor):
         player = self._download_webpage(player_url, embedCode)
         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
                                         player, 'mobile player url')
-        mobile_player = self._download_webpage(mobile_url, embedCode)
-        videos_info = self._search_regex(
-            r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
-            mobile_player, 'info').replace('\\"','"')
-        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
+        # Looks like some videos are only available for particular devices
+        # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
+        # is only available for ipad)
+        # Working around with fetching URLs for all the devices found starting with 'unknown'
+        # until we succeed or eventually fail for each device.
+        devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
+        devices.remove('unknown')
+        devices.insert(0, 'unknown')
+        for device in devices:
+            mobile_player = self._download_webpage(
+                '%s&device=%s' % (mobile_url, device), embedCode,
+                'Downloading mobile player JS for %s device' % device)
+            videos_info = self._search_regex(
+                r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
+                mobile_player, 'info', fatal=False, default=None)
+            if videos_info:
+                break
+        if not videos_info:
+            raise ExtractorError('Unable to extract info')
+        videos_info = videos_info.replace('\\"', '"')
+        videos_more_info = self._search_regex(
+            r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
         videos_info = json.loads(videos_info)
-        videos_more_info =json.loads(videos_more_info)
+        videos_more_info = json.loads(videos_more_info)
 
         if videos_more_info.get('lineup'):
             videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]

+ 75 - 1
youtube_dl/extractor/orf.py

@@ -3,6 +3,8 @@ from __future__ import unicode_literals
 
 import json
 import re
+import calendar
+import datetime
 
 from .common import InfoExtractor
 from ..utils import (
@@ -12,7 +14,9 @@ from ..utils import (
 )
 
 
-class ORFIE(InfoExtractor):
+class ORFTVthekIE(InfoExtractor):
+    IE_NAME = 'orf:tvthek'
+    IE_DESC = 'ORF TVthek'
     _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
 
     _TEST = {
@@ -105,3 +109,73 @@ class ORFIE(InfoExtractor):
             'entries': entries,
             'id': playlist_id,
         }
+
+
+# Audios on ORF radio are only available for 7 days, so we can't add tests.
+
+
+class ORFOE1IE(InfoExtractor):
+    IE_NAME = 'orf:oe1'
+    IE_DESC = 'Radio Österreich 1'
+    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_id = mobj.group('id')
+
+        data = self._download_json(
+            'http://oe1.orf.at/programm/%s/konsole' % show_id,
+            show_id
+        )
+
+        timestamp = datetime.datetime.strptime('%s %s' % (
+            data['item']['day_label'],
+            data['item']['time']
+        ), '%d.%m.%Y %H:%M')
+        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
+
+        return {
+            'id': show_id,
+            'title': data['item']['title'],
+            'url': data['item']['url_stream'],
+            'ext': 'mp3',
+            'description': data['item'].get('info'),
+            'timestamp': unix_timestamp
+        }
+
+
+class ORFFM4IE(InfoExtractor):
+    IE_DESC = 'orf:fm4'
+    IE_DESC = 'radio FM4'
+    _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_date = mobj.group('date')
+        show_id = mobj.group('show')
+
+        data = self._download_json(
+            'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
+            show_id
+        )
+
+        def extract_entry_dict(info, title, subtitle):
+            return {
+                'id': info['loopStreamId'].replace('.mp3', ''),
+                'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
+                'title': title,
+                'description': subtitle,
+                'duration': (info['end'] - info['start']) / 1000,
+                'timestamp': info['start'] / 1000,
+                'ext': 'mp3'
+            }
+
+        entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
+
+        return {
+            '_type': 'playlist',
+            'id': show_id,
+            'title': data['title'],
+            'description': data['subtitle'],
+            'entries': entries
+        }

+ 9 - 10
youtube_dl/extractor/reverbnation.py

@@ -1,23 +1,23 @@
 from __future__ import unicode_literals
 
 import re
-import time
 
 from .common import InfoExtractor
-from ..utils import strip_jsonp
+from ..utils import str_or_none
 
 
 class ReverbNationIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
     _TESTS = [{
         'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
-        'file': '16965047.mp3',
         'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
         'info_dict': {
+            "id": "16965047",
+            "ext": "mp3",
             "title": "MONA LISA",
             "uploader": "ALKILADOS",
-            "uploader_id": 216429,
-            "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
+            "uploader_id": "216429",
+            "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$"
         },
     }]
 
@@ -26,10 +26,8 @@ class ReverbNationIE(InfoExtractor):
         song_id = mobj.group('id')
 
         api_res = self._download_json(
-            'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
-                % (song_id, int(time.time() * 1000)),
+            'https://api.reverbnation.com/song/%s' % song_id,
             song_id,
-            transform_source=strip_jsonp,
             note='Downloading information of song %s' % song_id
         )
 
@@ -38,8 +36,9 @@ class ReverbNationIE(InfoExtractor):
             'title': api_res.get('name'),
             'url': api_res.get('url'),
             'uploader': api_res.get('artist', {}).get('name'),
-            'uploader_id': api_res.get('artist', {}).get('id'),
-            'thumbnail': api_res.get('image', api_res.get('thumbnail')),
+            'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
+            'thumbnail': self._proto_relative_url(
+                api_res.get('image', api_res.get('thumbnail'))),
             'ext': 'mp3',
             'vcodec': 'none',
         }

+ 6 - 0
youtube_dl/utils.py

@@ -1273,9 +1273,15 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
     if get_attr:
         if v is not None:
             v = getattr(v, get_attr, None)
+    if v == '':
+        v = None
     return default if v is None else (int(v) * invscale // scale)
 
 
+def str_or_none(v, default=None):
+    return default if v is None else compat_str(v)
+
+
 def str_to_int(int_str):
     if int_str is None:
         return None

+ 1 - 1
youtube_dl/version.py

@@ -1,2 +1,2 @@
 
-__version__ = '2014.08.05'
+__version__ = '2014.08.10'