Browse Source

Merge remote-tracking branch 'upstream/master'

Allan Zhou 12 years ago
parent
commit
90d3989b99

+ 2 - 2
devscripts/youtube_genalgo.py

@@ -26,9 +26,9 @@ tests = [
     # 84
     # 84
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
      "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
      "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
-    # 83 - vflcaqGO8 2013/07/11
+    # 83 - vflTWC9KW 2013/08/01
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
-     "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"),
+     "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"),
     # 82
     # 82
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
      "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
      "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),

+ 1 - 0
test/test_all_urls.py

@@ -50,6 +50,7 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
         self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
+        self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
 
 
     def test_no_duplicates(self):
     def test_no_duplicates(self):
         ies = gen_extractors()
         ies = gen_extractors()

+ 0 - 79
test/test_youtube_sig.py

@@ -1,79 +0,0 @@
-#!/usr/bin/env python
-
-import unittest
-import sys
-
-# Allow direct execution
-import os
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from youtube_dl.extractor.youtube import YoutubeIE
-from helper import FakeYDL
-
-ie = YoutubeIE(FakeYDL())
-sig = ie._decrypt_signature
-sig_age_gate = ie._decrypt_signature_age_gate
-
-class TestYoutubeSig(unittest.TestCase):
-    def test_92(self):
-        wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
-        right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
-        self.assertEqual(sig(wrong), right)
-
-    def test_90(self):
-        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
-        right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
-        self.assertEqual(sig(wrong), right)
-
-    def test_88(self):
-        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
-        right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
-        self.assertEqual(sig(wrong), right)
-
-    def test_87(self):
-        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
-        right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"
-        self.assertEqual(sig(wrong), right)
-
-    def test_86(self):
-        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
-        right = ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre"
-        self.assertEqual(sig(wrong), right)
-
-    def test_85(self):
-        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
-        right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
-        self.assertEqual(sig(wrong), right)
-
-    def test_84(self):
-        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
-        right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
-        self.assertEqual(sig(wrong), right)
-
-    def test_83(self):
-        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
-        right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"
-        self.assertEqual(sig(wrong), right)
-
-    def test_82(self):
-        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
-        right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
-        self.assertEqual(sig(wrong), right)
-
-    def test_81(self):
-        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>."
-        right = "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"
-        self.assertEqual(sig(wrong), right)
-
-    def test_79(self):
-        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/"
-        right = "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"
-        self.assertEqual(sig(wrong), right)
-    
-    def test_86_age_gate(self):
-        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
-        right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
-        self.assertEqual(sig_age_gate(wrong), right)
-
-if __name__ == '__main__':
-    unittest.main()

+ 2 - 2
youtube_dl/YoutubeDL.py

@@ -264,7 +264,7 @@ class YoutubeDL(object):
             self.report_error(u'Erroneous output template')
             self.report_error(u'Erroneous output template')
             return None
             return None
         except ValueError as err:
         except ValueError as err:
-            self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
+            self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
             return None
             return None
 
 
     def _match_entry(self, info_dict):
     def _match_entry(self, info_dict):
@@ -547,7 +547,7 @@ class YoutubeDL(object):
                 try:
                 try:
                     success = self.fd._do_download(filename, info_dict)
                     success = self.fd._do_download(filename, info_dict)
                 except (OSError, IOError) as err:
                 except (OSError, IOError) as err:
-                    raise UnavailableVideoError()
+                    raise UnavailableVideoError(err)
                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                     self.report_error(u'unable to download video data: %s' % str(err))
                     self.report_error(u'unable to download video data: %s' % str(err))
                     return
                     return

+ 4 - 0
youtube_dl/extractor/__init__.py

@@ -45,15 +45,18 @@ from .livestream import LivestreamIE
 from .metacafe import MetacafeIE
 from .metacafe import MetacafeIE
 from .mixcloud import MixcloudIE
 from .mixcloud import MixcloudIE
 from .mtv import MTVIE
 from .mtv import MTVIE
+from .muzu import MuzuTVIE
 from .myspass import MySpassIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .myvideo import MyVideoIE
 from .nba import NBAIE
 from .nba import NBAIE
+from .ooyala import OoyalaIE
 from .photobucket import PhotobucketIE
 from .photobucket import PhotobucketIE
 from .pornotube import PornotubeIE
 from .pornotube import PornotubeIE
 from .rbmaradio import RBMARadioIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
 from .redtube import RedTubeIE
 from .ringtv import RingTVIE
 from .ringtv import RingTVIE
 from .roxwel import RoxwelIE
 from .roxwel import RoxwelIE
+from .rtlnow import RTLnowIE
 from .sina import SinaIE
 from .sina import SinaIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .spiegel import SpiegelIE
 from .spiegel import SpiegelIE
@@ -72,6 +75,7 @@ from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .vbox7 import Vbox7IE
 from .veoh import VeohIE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .vevo import VevoIE
+from .videofyme import VideofyMeIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .vine import VineIE
 from .c56 import C56IE
 from .c56 import C56IE

+ 30 - 2
youtube_dl/extractor/arte.py

@@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor):
     """
     """
     _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
     _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
     _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
     _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
+    _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
     _LIVE_URL = r'index-[0-9]+\.html$'
     _LIVE_URL = r'index-[0-9]+\.html$'
 
 
     IE_NAME = u'arte.tv'
     IE_NAME = u'arte.tv'
 
 
     @classmethod
     @classmethod
     def suitable(cls, url):
     def suitable(cls, url):
-        return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
+        return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))
 
 
     # TODO implement Live Stream
     # TODO implement Live Stream
     # from ..utils import compat_urllib_parse
     # from ..utils import compat_urllib_parse
@@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor):
             lang = mobj.group('lang')
             lang = mobj.group('lang')
             return self._extract_video(url, id, lang)
             return self._extract_video(url, id, lang)
 
 
+        mobj = re.match(self._LIVEWEB_URL, url)
+        if mobj is not None:
+            name = mobj.group('name')
+            lang = mobj.group('lang')
+            return self._extract_liveweb(url, name, lang)
+
         if re.search(self._LIVE_URL, video_id) is not None:
         if re.search(self._LIVE_URL, video_id) is not None:
             raise ExtractorError(u'Arte live streams are not yet supported, sorry')
             raise ExtractorError(u'Arte live streams are not yet supported, sorry')
             # self.extractLiveStream(url)
             # self.extractLiveStream(url)
@@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor):
 
 
         info_dict = {'id': player_info['VID'],
         info_dict = {'id': player_info['VID'],
                      'title': player_info['VTI'],
                      'title': player_info['VTI'],
-                     'description': player_info['VDE'],
+                     'description': player_info.get('VDE'),
                      'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
                      'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
                      'thumbnail': player_info['programImage'],
                      'thumbnail': player_info['programImage'],
                      'ext': 'flv',
                      'ext': 'flv',
@@ -104,6 +111,8 @@ class ArteTvIE(InfoExtractor):
         formats = filter(_match_lang, formats)
         formats = filter(_match_lang, formats)
         # We order the formats by quality
         # We order the formats by quality
         formats = sorted(formats, key=lambda f: int(f['height']))
         formats = sorted(formats, key=lambda f: int(f['height']))
+        # Prefer videos without subtitles in the same language
+        formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
         # Pick the best quality
         # Pick the best quality
         format_info = formats[-1]
         format_info = formats[-1]
         if format_info['mediaType'] == u'rtmp':
         if format_info['mediaType'] == u'rtmp':
@@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor):
                 'url': video_url,
                 'url': video_url,
                 'ext': 'flv',
                 'ext': 'flv',
                 }
                 }
+
+    def _extract_liveweb(self, url, name, lang):
+        """Extract form http://liveweb.arte.tv/"""
+        webpage = self._download_webpage(url, name)
+        video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
+        config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
+                                            video_id, u'Downloading information')
+        config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
+        event_doc = config_doc.find('event')
+        url_node = event_doc.find('video').find('urlHd')
+        if url_node is None:
+            url_node = video_doc.find('urlSd')
+
+        return {'id': video_id,
+                'title': event_doc.find('name%s' % lang.capitalize()).text,
+                'url': url_node.text.replace('MP4', 'mp4'),
+                'ext': 'flv',
+                'thumbnail': self._og_search_thumbnail(webpage),
+                }

+ 1 - 1
youtube_dl/extractor/collegehumor.py

@@ -10,7 +10,7 @@ from ..utils import (
 
 
 
 
 class CollegeHumorIE(InfoExtractor):
 class CollegeHumorIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
 
 
     _TEST = {
     _TEST = {
         u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
         u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',

+ 64 - 0
youtube_dl/extractor/muzu.py

@@ -0,0 +1,64 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    determine_ext,
+)
+
+
+class MuzuTVIE(InfoExtractor):
+    _VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
+    IE_NAME = u'muzu.tv'
+
+    _TEST = {
+        u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
+        u'file': u'1981454.mp4',
+        u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
+        u'info_dict': {
+            u'title': u'Cat Walk (Original Mix)',
+            u'description': u'md5:90e868994de201b2570e4e5854e19420',
+            u'uploader': u'MarcAshken featuring SOS',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        info_data = compat_urllib_parse.urlencode({'format': 'json',
+                                                   'url': url,
+                                                   })
+        video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
+                                                 video_id, u'Downloading video info')
+        info = json.loads(video_info_page)
+
+        player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
+                                                  video_id, u'Downloading player info')
+        video_info = json.loads(player_info_page)['videos'][0]
+        for quality in ['1080' , '720', '480', '360']:
+            if video_info.get('v%s' % quality):
+                break
+
+        data = compat_urllib_parse.urlencode({'ai': video_id,
+                                              # Even if each time you watch a video the hash changes,
+                                              # it seems to work for different videos, and it will work
+                                              # even if you use any non empty string as a hash
+                                              'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
+                                              'device': 'web',
+                                              'qv': quality,
+                                              })
+        video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
+                                                video_id, u'Downloading video url')
+        video_url_info = json.loads(video_url_page)
+        video_url = video_url_info['url']
+
+        return {'id': video_id,
+                'title': info['title'],
+                'url': video_url,
+                'ext': determine_ext(video_url),
+                'thumbnail': info['thumbnail_url'],
+                'description': info['description'],
+                'uploader': info['author_name'],
+                }

+ 17 - 1
youtube_dl/extractor/myvideo.py

@@ -2,11 +2,13 @@ import binascii
 import base64
 import base64
 import hashlib
 import hashlib
 import re
 import re
+import json
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
     compat_ord,
     compat_ord,
     compat_urllib_parse,
     compat_urllib_parse,
+    compat_urllib_request,
 
 
     ExtractorError,
     ExtractorError,
 )
 )
@@ -16,7 +18,7 @@ from ..utils import (
 class MyVideoIE(InfoExtractor):
 class MyVideoIE(InfoExtractor):
     """Information Extractor for myvideo.de."""
     """Information Extractor for myvideo.de."""
 
 
-    _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
+    _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
     IE_NAME = u'myvideo'
     IE_NAME = u'myvideo'
     _TEST = {
     _TEST = {
         u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
         u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
@@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor):
                 'ext':      video_ext,
                 'ext':      video_ext,
             }]
             }]
 
 
+        mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
+        if mobj is not None:
+            request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
+            response = self._download_webpage(request, video_id,
+                                              u'Downloading video info')
+            info = json.loads(base64.b64decode(response).decode('utf-8'))
+            return {'id': video_id,
+                    'title': info['title'],
+                    'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
+                    'play_path': info['filename'],
+                    'ext': 'flv',
+                    'thumbnail': info['thumbnail'][0]['url'],
+                    }
+
         # try encxml
         # try encxml
         mobj = re.search('var flashvars={(.+?)}', webpage)
         mobj = re.search('var flashvars={(.+?)}', webpage)
         if mobj is None:
         if mobj is None:

+ 52 - 0
youtube_dl/extractor/ooyala.py

@@ -0,0 +1,52 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import unescapeHTML
+
+class OoyalaIE(InfoExtractor):
+    _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
+
+    _TEST = {
+        # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
+        u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+        u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
+        u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
+        u'info_dict': {
+            u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
+            u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+        },
+    }
+
+    def _extract_result(self, info, more_info):
+        return {'id': info['embedCode'],
+                'ext': 'mp4',
+                'title': unescapeHTML(info['title']),
+                'url': info['url'],
+                'description': unescapeHTML(more_info['description']),
+                'thumbnail': more_info['promo'],
+                }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        embedCode = mobj.group('id')
+        player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
+        player = self._download_webpage(player_url, embedCode)
+        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
+                                        player, u'mobile player url')
+        mobile_player = self._download_webpage(mobile_url, embedCode)
+        videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
+        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
+        videos_info = json.loads(videos_info)
+        videos_more_info =json.loads(videos_more_info)
+
+        if videos_more_info.get('lineup'):
+            videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
+            return {'_type': 'playlist',
+                    'id': embedCode,
+                    'title': unescapeHTML(videos_more_info['title']),
+                    'entries': videos,
+                    }
+        else:
+            return self._extract_result(videos_info[0], videos_more_info)
+        

+ 100 - 0
youtube_dl/extractor/rtlnow.py

@@ -0,0 +1,100 @@
+# encoding: utf-8
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+class RTLnowIE(InfoExtractor):
+    """Information Extractor for RTLnow, RTL2now and VOXnow"""
+    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+    _TESTS = [{
+        u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
+        u'file': u'90419.flv',
+        u'info_dict': {
+            u'upload_date': u'20070416', 
+            u'title': u'Ahornallee - Folge 1 - Der Einzug',
+            u'description': u'Folge 1 - Der Einzug',
+        },
+        u'params': {
+            u'skip_download': True,
+        },
+    },
+    {
+        u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
+        u'file': u'69756.flv',
+        u'info_dict': {
+            u'upload_date': u'20120519', 
+            u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...',
+            u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
+            u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
+        },
+        u'params': {
+            u'skip_download': True,
+        },
+    },
+    {
+        u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
+        u'file': u'13883.flv',
+        u'info_dict': {
+            u'upload_date': u'20090627', 
+            u'title': u'Voxtours - Südafrika-Reporter II',
+            u'description': u'Südafrika-Reporter II',
+        },
+        u'params': {
+            u'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self,url):
+        mobj = re.match(self._VALID_URL, url)
+
+        webpage_url = u'http://' + mobj.group('url')
+        video_page_url = u'http://' + mobj.group('base_url')
+        video_id = mobj.group(u'video_id')
+
+        webpage = self._download_webpage(webpage_url, video_id)
+        video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>',
+            webpage, u'title')
+        playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
+            webpage, u'playerdata_url')
+
+        playerdata = self._download_webpage(playerdata_url, video_id)
+        mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata)
+        if mobj:
+            video_description = mobj.group(u'description')
+            if mobj.group('upload_date_Y'):
+                video_upload_date = mobj.group('upload_date_Y')
+            else:
+                video_upload_date = u'20' + mobj.group('upload_date_y')
+            video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d')
+        else:
+            video_description = None
+            video_upload_date = None
+            self._downloader.report_warning(u'Unable to extract description and upload date')
+
+        # Thumbnail: not every video has an thumbnail
+        mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)
+        if mobj:
+            video_thumbnail = mobj.group(u'thumbnail')
+        else:
+            video_thumbnail = None
+
+        mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract media URL')
+        video_url = mobj.group(u'url')
+        video_play_path = u'mp4:' + mobj.group(u'play_path')
+        video_player_url = video_page_url + u'includes/vodplayer.swf'
+
+        return [{
+            'id':          video_id,
+            'url':         video_url,
+            'play_path':   video_play_path,
+            'page_url':    video_page_url,
+            'player_url':  video_player_url,
+            'ext':         'flv',
+            'title':       video_title,
+            'description': video_description,
+            'upload_date': video_upload_date,
+            'thumbnail':   video_thumbnail,
+        }]

+ 2 - 2
youtube_dl/extractor/vevo.py

@@ -8,7 +8,7 @@ from ..utils import (
 
 
 class VevoIE(InfoExtractor):
 class VevoIE(InfoExtractor):
     """
     """
-    Accecps urls from vevo.com or in the format 'vevo:{id}'
+    Accepts urls from vevo.com or in the format 'vevo:{id}'
     (currently used by MTVIE)
     (currently used by MTVIE)
     """
     """
     _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$'
     _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$'
@@ -19,7 +19,7 @@ class VevoIE(InfoExtractor):
         u'info_dict': {
         u'info_dict': {
             u"upload_date": u"20130624", 
             u"upload_date": u"20130624", 
             u"uploader": u"Hurts", 
             u"uploader": u"Hurts", 
-            u"title": u"Somebody To Die For"
+            u"title": u"Somebody to Die For"
         }
         }
     }
     }
 
 

+ 49 - 0
youtube_dl/extractor/videofyme.py

@@ -0,0 +1,49 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    find_xpath_attr,
+    determine_ext,
+)
+
+class VideofyMeIE(InfoExtractor):
+    _VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)'
+    IE_NAME = u'videofy.me'
+
+    _TEST = {
+        u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
+        u'file':  u'1100701.mp4',
+        u'md5': u'2046dd5758541d630bfa93e741e2fd79',
+        u'info_dict': {
+            u'title': u'This is VideofyMe',
+            u'description': None,
+            u'uploader': u'VideofyMe',
+            u'uploader_id': u'thisisvideofyme',
+        },
+        
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
+                                            video_id)
+        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
+        video = config.find('video')
+        sources = video.find('sources')
+        url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on')
+        if url_node is None:
+            url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off')
+        video_url = url_node.find('url').text
+
+        return {'id': video_id,
+                'title': video.find('title').text,
+                'url': video_url,
+                'ext': determine_ext(video_url),
+                'thumbnail': video.find('thumb').text,
+                'description': video.find('description').text,
+                'uploader': config.find('blog/name').text,
+                'uploader_id': video.find('identifier').text,
+                'view_count': re.search(r'\d+', video.find('views').text).group(),
+                }

+ 17 - 11
youtube_dl/extractor/youtube.py

@@ -141,7 +141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          (?:                                                  # the various things that can precede the ID:
                          (?:                                                  # the various things that can precede the ID:
                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                              |(?:                                             # or the v= param in all its forms
                              |(?:                                             # or the v= param in all its forms
-                                 (?:watch|movie(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+                                 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                  v=
                                  v=
@@ -221,6 +221,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         '132': '240p',
         '132': '240p',
         '151': '72p',
         '151': '72p',
     }
     }
+    _3d_itags = ['85', '84', '102', '83', '101', '82', '100']
     IE_NAME = u'youtube'
     IE_NAME = u'youtube'
     _TESTS = [
     _TESTS = [
         {
         {
@@ -334,18 +335,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
         elif len(s) == 90:
         elif len(s) == 90:
             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
+        elif len(s) == 89:
+            return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
         elif len(s) == 88:
         elif len(s) == 88:
             return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
             return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
         elif len(s) == 87:
         elif len(s) == 87:
-            return s[4:23] + s[86] + s[24:85]
+            return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
         elif len(s) == 86:
         elif len(s) == 86:
-            return s[83:85] + s[26] + s[79:46:-1] + s[85] + s[45:36:-1] + s[30] + s[35:30:-1] + s[46] + s[29:26:-1] + s[82] + s[25:1:-1]
+            return s[5:20] + s[2] + s[21:]
         elif len(s) == 85:
         elif len(s) == 85:
-            return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
+            return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
         elif len(s) == 84:
         elif len(s) == 84:
-            return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
+            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
         elif len(s) == 83:
         elif len(s) == 83:
-            return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:]
+            return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
         elif len(s) == 82:
         elif len(s) == 82:
             return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
             return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
         elif len(s) == 81:
         elif len(s) == 81:
@@ -467,7 +470,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     def _print_formats(self, formats):
     def _print_formats(self, formats):
         print('Available formats:')
         print('Available formats:')
         for x in formats:
         for x in formats:
-            print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
+            print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
+                                        self._video_dimensions.get(x, '???'),
+                                        ' (3D)' if x in self._3d_itags else ''))
 
 
     def _extract_id(self, url):
     def _extract_id(self, url):
         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -715,8 +720,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             s = url_data['s'][0]
                             s = url_data['s'][0]
                             if age_gate:
                             if age_gate:
                                 player_version = self._search_regex(r'ad3-(.+?)\.swf',
                                 player_version = self._search_regex(r'ad3-(.+?)\.swf',
-                                    video_info['ad3_module'][0], 'flash player',
-                                    fatal=False)
+                                    video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
+                                    'flash player', fatal=False)
                                 player = 'flash player %s' % player_version
                                 player = 'flash player %s' % player_version
                             else:
                             else:
                                 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
                                 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
@@ -751,8 +756,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             # Extension
             # Extension
             video_extension = self._video_extensions.get(format_param, 'flv')
             video_extension = self._video_extensions.get(format_param, 'flv')
 
 
-            video_format = '{0} - {1}'.format(format_param if format_param else video_extension,
-                                              self._video_dimensions.get(format_param, '???'))
+            video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
+                                              self._video_dimensions.get(format_param, '???'),
+                                              ' (3D)' if format_param in self._3d_itags else '')
 
 
             results.append({
             results.append({
                 'id':       video_id,
                 'id':       video_id,

+ 2 - 2
youtube_dl/utils.py

@@ -207,7 +207,7 @@ if sys.version_info >= (2,7):
     def find_xpath_attr(node, xpath, key, val):
     def find_xpath_attr(node, xpath, key, val):
         """ Find the xpath xpath[@key=val] """
         """ Find the xpath xpath[@key=val] """
         assert re.match(r'^[a-zA-Z]+$', key)
         assert re.match(r'^[a-zA-Z]+$', key)
-        assert re.match(r'^[a-zA-Z@]*$', val)
+        assert re.match(r'^[a-zA-Z@\s]*$', val)
         expr = xpath + u"[@%s='%s']" % (key, val)
         expr = xpath + u"[@%s='%s']" % (key, val)
         return node.find(expr)
         return node.find(expr)
 else:
 else:
@@ -497,7 +497,7 @@ class ExtractorError(Exception):
         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
             expected = True
             expected = True
         if not expected:
         if not expected:
-            msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+            msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
         super(ExtractorError, self).__init__(msg)
         super(ExtractorError, self).__init__(msg)
 
 
         self.traceback = tb
         self.traceback = tb

+ 1 - 1
youtube_dl/version.py

@@ -1,2 +1,2 @@
 
 
-__version__ = '2013.07.31'
+__version__ = '2013.08.17'