Browse Source

Merge branch 'master' into rtmpdump

Conflicts:
	youtube_dl/FileDownloader.py

Merge
rzhxeo 11 years ago
parent
commit
2b35c9ef74
64 changed files with 1653 additions and 638 deletions
  1. 4 2
      README.md
  2. 1 0
      setup.py
  3. 17 0
      test/helper.py
  4. 16 4
      test/test_YoutubeDL.py
  5. 0 70
      test/test_dailymotion_subtitles.py
  6. 52 23
      test/test_download.py
  7. 18 0
      test/test_playlists.py
  8. 211 0
      test/test_subtitles.py
  9. 0 95
      test/test_youtube_subtitles.py
  10. 42 33
      youtube_dl/FileDownloader.py
  11. 1 1
      youtube_dl/PostProcessor.py
  12. 126 32
      youtube_dl/YoutubeDL.py
  13. 58 55
      youtube_dl/__init__.py
  14. 16 2
      youtube_dl/extractor/__init__.py
  15. 35 17
      youtube_dl/extractor/arte.py
  16. 81 0
      youtube_dl/extractor/bambuser.py
  17. 49 12
      youtube_dl/extractor/brightcove.py
  18. 4 2
      youtube_dl/extractor/canalc2.py
  19. 3 1
      youtube_dl/extractor/cinemassacre.py
  20. 1 1
      youtube_dl/extractor/cnn.py
  21. 21 6
      youtube_dl/extractor/common.py
  22. 23 8
      youtube_dl/extractor/dailymotion.py
  23. 1 1
      youtube_dl/extractor/depositfiles.py
  24. 37 0
      youtube_dl/extractor/eitb.py
  25. 2 0
      youtube_dl/extractor/exfm.py
  26. 50 0
      youtube_dl/extractor/extremetube.py
  27. 38 0
      youtube_dl/extractor/gamekings.py
  28. 23 5
      youtube_dl/extractor/generic.py
  29. 2 2
      youtube_dl/extractor/hypem.py
  30. 6 1
      youtube_dl/extractor/kankan.py
  31. 4 4
      youtube_dl/extractor/keezmovies.py
  32. 47 7
      youtube_dl/extractor/livestream.py
  33. 46 5
      youtube_dl/extractor/metacafe.py
  34. 49 0
      youtube_dl/extractor/mofosex.py
  35. 4 1
      youtube_dl/extractor/mtv.py
  36. 48 0
      youtube_dl/extractor/myspace.py
  37. 3 3
      youtube_dl/extractor/pornhub.py
  38. 3 1
      youtube_dl/extractor/redtube.py
  39. 0 12
      youtube_dl/extractor/rtlnow.py
  40. 1 0
      youtube_dl/extractor/slashdot.py
  41. 51 81
      youtube_dl/extractor/soundcloud.py
  42. 19 6
      youtube_dl/extractor/southparkstudios.py
  43. 35 0
      youtube_dl/extractor/space.py
  44. 3 3
      youtube_dl/extractor/spankwire.py
  45. 36 14
      youtube_dl/extractor/spiegel.py
  46. 6 6
      youtube_dl/extractor/subtitles.py
  47. 33 8
      youtube_dl/extractor/teamcoco.py
  48. 47 25
      youtube_dl/extractor/ted.py
  49. 3 3
      youtube_dl/extractor/tube8.py
  50. 42 0
      youtube_dl/extractor/tvp.py
  51. 73 22
      youtube_dl/extractor/vevo.py
  52. 1 1
      youtube_dl/extractor/viddler.py
  53. 8 9
      youtube_dl/extractor/vimeo.py
  54. 1 1
      youtube_dl/extractor/vine.py
  55. 45 0
      youtube_dl/extractor/vk.py
  56. 1 0
      youtube_dl/extractor/weibo.py
  57. 1 1
      youtube_dl/extractor/xnxx.py
  58. 55 0
      youtube_dl/extractor/xtube.py
  59. 1 1
      youtube_dl/extractor/yahoo.py
  60. 3 3
      youtube_dl/extractor/youku.py
  61. 4 4
      youtube_dl/extractor/youporn.py
  62. 36 42
      youtube_dl/extractor/youtube.py
  63. 5 1
      youtube_dl/update.py
  64. 1 1
      youtube_dl/version.py

+ 4 - 2
README.md

@@ -92,12 +92,14 @@ which means you can modify it, redistribute it or use it however you like.
                                ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
                                ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
     --autonumber-size NUMBER   Specifies the number of digits in %(autonumber)s
     --autonumber-size NUMBER   Specifies the number of digits in %(autonumber)s
                                when it is present in output filename template or
                                when it is present in output filename template or
-                               --autonumber option is given
+                               --auto-number option is given
     --restrict-filenames       Restrict filenames to only ASCII characters, and
     --restrict-filenames       Restrict filenames to only ASCII characters, and
                                avoid "&" and spaces in filenames
                                avoid "&" and spaces in filenames
     -a, --batch-file FILE      file containing URLs to download ('-' for stdin)
     -a, --batch-file FILE      file containing URLs to download ('-' for stdin)
     -w, --no-overwrites        do not overwrite files
     -w, --no-overwrites        do not overwrite files
-    -c, --continue             resume partially downloaded files
+    -c, --continue             force resume of partially downloaded files. By
+                               default, youtube-dl will resume downloads if
+                               possible.
     --no-continue              do not resume partially downloaded files (restart
     --no-continue              do not resume partially downloaded files (restart
                                from beginning)
                                from beginning)
     --cookies FILE             file to read cookies from and dump cookie jar in
     --cookies FILE             file to read cookies from and dump cookie jar in

+ 1 - 0
setup.py

@@ -11,6 +11,7 @@ try:
     setuptools_available = True
     setuptools_available = True
 except ImportError:
 except ImportError:
     from distutils.core import setup
     from distutils.core import setup
+    setuptools_available = False
 
 
 try:
 try:
     # This will create an exe that needs Microsoft Visual C++ 2008
     # This will create an exe that needs Microsoft Visual C++ 2008

+ 17 - 0
test/helper.py

@@ -5,9 +5,11 @@ import json
 import os.path
 import os.path
 import re
 import re
 import types
 import types
+import sys
 
 
 import youtube_dl.extractor
 import youtube_dl.extractor
 from youtube_dl import YoutubeDL
 from youtube_dl import YoutubeDL
+from youtube_dl.utils import preferredencoding
 
 
 
 
 def global_setup():
 def global_setup():
@@ -33,6 +35,21 @@ def try_rm(filename):
             raise
             raise
 
 
 
 
+def report_warning(message):
+    '''
+    Print the message to stderr, it will be prefixed with 'WARNING:'
+    If stderr is a tty file the 'WARNING:' will be colored
+    '''
+    if sys.stderr.isatty() and os.name != 'nt':
+        _msg_header = u'\033[0;33mWARNING:\033[0m'
+    else:
+        _msg_header = u'WARNING:'
+    output = u'%s %s\n' % (_msg_header, message)
+    if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
+        output = output.encode(preferredencoding())
+    sys.stderr.write(output)
+
+
 class FakeYDL(YoutubeDL):
 class FakeYDL(YoutubeDL):
     def __init__(self, override=None):
     def __init__(self, override=None):
         # Different instances of the downloader can't share the same dictionary
         # Different instances of the downloader can't share the same dictionary

+ 16 - 4
test/test_YoutubeDL.py

@@ -62,10 +62,10 @@ class TestFormatSelection(unittest.TestCase):
 
 
     def test_format_limit(self):
     def test_format_limit(self):
         formats = [
         formats = [
-            {u'format_id': u'meh'},
-            {u'format_id': u'good'},
-            {u'format_id': u'great'},
-            {u'format_id': u'excellent'},
+            {u'format_id': u'meh', u'url': u'http://example.com/meh'},
+            {u'format_id': u'good', u'url': u'http://example.com/good'},
+            {u'format_id': u'great', u'url': u'http://example.com/great'},
+            {u'format_id': u'excellent', u'url': u'http://example.com/exc'},
         ]
         ]
         info_dict = {
         info_dict = {
             u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
             u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
@@ -128,6 +128,18 @@ class TestFormatSelection(unittest.TestCase):
         downloaded = ydl.downloaded_info_dicts[0]
         downloaded = ydl.downloaded_info_dicts[0]
         self.assertEqual(downloaded['format_id'], u'35')
         self.assertEqual(downloaded['format_id'], u'35')
 
 
+    def test_add_extra_info(self):
+        test_dict = {
+            'extractor': 'Foo',
+        }
+        extra_info = {
+            'extractor': 'Bar',
+            'playlist': 'funny videos',
+        }
+        YDL.add_extra_info(test_dict, extra_info)
+        self.assertEqual(test_dict['extractor'], 'Foo')
+        self.assertEqual(test_dict['playlist'], 'funny videos')
+
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     unittest.main()
     unittest.main()

+ 0 - 70
test/test_dailymotion_subtitles.py

@@ -1,70 +0,0 @@
-#!/usr/bin/env python
-
-# Allow direct execution
-import os
-import sys
-import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from test.helper import FakeYDL, global_setup, md5
-global_setup()
-
-
-from youtube_dl.extractor import DailymotionIE
-
-class TestDailymotionSubtitles(unittest.TestCase):
-    def setUp(self):
-        self.DL = FakeYDL()
-        self.url = 'http://www.dailymotion.com/video/xczg00'
-    def getInfoDict(self):
-        IE = DailymotionIE(self.DL)
-        info_dict = IE.extract(self.url)
-        return info_dict
-    def getSubtitles(self):
-        info_dict = self.getInfoDict()
-        return info_dict[0]['subtitles']
-    def test_no_writesubtitles(self):
-        subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
-    def test_subtitles(self):
-        self.DL.params['writesubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
-    def test_subtitles_lang(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitleslangs'] = ['fr']
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
-    def test_allsubtitles(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['allsubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles.keys()), 5)
-    def test_list_subtitles(self):
-        self.DL.expect_warning(u'Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-    def test_automatic_captions(self):
-        self.DL.expect_warning(u'Automatic Captions not supported by this server')
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslang'] = ['en']
-        subtitles = self.getSubtitles()
-        self.assertTrue(len(subtitles.keys()) == 0)
-    def test_nosubtitles(self):
-        self.DL.expect_warning(u'video doesn\'t have subtitles')
-        self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['allsubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
-    def test_multiple_langs(self):
-        self.DL.params['writesubtitles'] = True
-        langs = ['es', 'fr', 'de']
-        self.DL.params['subtitleslangs'] = langs
-        subtitles = self.getSubtitles()
-        for lang in langs:
-            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
-
-if __name__ == '__main__':
-    unittest.main()

+ 52 - 23
test/test_download.py

@@ -6,7 +6,14 @@ import sys
 import unittest
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
-from test.helper import get_params, get_testcases, global_setup, try_rm, md5
+from test.helper import (
+    get_params,
+    get_testcases,
+    global_setup,
+    try_rm,
+    md5,
+    report_warning
+)
 global_setup()
 global_setup()
 
 
 
 
@@ -19,10 +26,12 @@ import youtube_dl.YoutubeDL
 from youtube_dl.utils import (
 from youtube_dl.utils import (
     compat_str,
     compat_str,
     compat_urllib_error,
     compat_urllib_error,
+    compat_HTTPError,
     DownloadError,
     DownloadError,
     ExtractorError,
     ExtractorError,
     UnavailableVideoError,
     UnavailableVideoError,
 )
 )
+from youtube_dl.extractor import get_info_extractor
 
 
 RETRIES = 3
 RETRIES = 3
 
 
@@ -55,17 +64,25 @@ def generator(test_case):
 
 
     def test_template(self):
     def test_template(self):
         ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
         ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
+        other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
         def print_skipping(reason):
         def print_skipping(reason):
             print('Skipping %s: %s' % (test_case['name'], reason))
             print('Skipping %s: %s' % (test_case['name'], reason))
-        if not ie._WORKING:
+        if not ie.working():
             print_skipping('IE marked as not _WORKING')
             print_skipping('IE marked as not _WORKING')
             return
             return
-        if 'playlist' not in test_case and not test_case['file']:
-            print_skipping('No output file specified')
-            return
+        if 'playlist' not in test_case:
+            info_dict = test_case.get('info_dict', {})
+            if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
+                print_skipping('The output file cannot be know, the "file" '
+                    'key is missing or the info_dict is incomplete')
+                return
         if 'skip' in test_case:
         if 'skip' in test_case:
             print_skipping(test_case['skip'])
             print_skipping(test_case['skip'])
             return
             return
+        for other_ie in other_ies:
+            if not other_ie.working():
+                print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
+                return
 
 
         params = get_params(test_case.get('params', {}))
         params = get_params(test_case.get('params', {}))
 
 
@@ -77,35 +94,47 @@ def generator(test_case):
                 finished_hook_called.add(status['filename'])
                 finished_hook_called.add(status['filename'])
         ydl.fd.add_progress_hook(_hook)
         ydl.fd.add_progress_hook(_hook)
 
 
+        def get_tc_filename(tc):
+            return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
+
         test_cases = test_case.get('playlist', [test_case])
         test_cases = test_case.get('playlist', [test_case])
-        for tc in test_cases:
-            try_rm(tc['file'])
-            try_rm(tc['file'] + '.part')
-            try_rm(tc['file'] + '.info.json')
+        def try_rm_tcs_files():
+            for tc in test_cases:
+                tc_filename = get_tc_filename(tc)
+                try_rm(tc_filename)
+                try_rm(tc_filename + '.part')
+                try_rm(tc_filename + '.info.json')
+        try_rm_tcs_files()
         try:
         try:
-            for retry in range(1, RETRIES + 1):
+            try_num = 1
+            while True:
                 try:
                 try:
                     ydl.download([test_case['url']])
                     ydl.download([test_case['url']])
                 except (DownloadError, ExtractorError) as err:
                 except (DownloadError, ExtractorError) as err:
-                    if retry == RETRIES: raise
-
                     # Check if the exception is not a network related one
                     # Check if the exception is not a network related one
-                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
                         raise
                         raise
 
 
-                    print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry))
+                    if try_num == RETRIES:
+                        report_warning(u'Failed due to network errors, skipping...')
+                        return
+
+                    print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
+
+                    try_num += 1
                 else:
                 else:
                     break
                     break
 
 
             for tc in test_cases:
             for tc in test_cases:
+                tc_filename = get_tc_filename(tc)
                 if not test_case.get('params', {}).get('skip_download', False):
                 if not test_case.get('params', {}).get('skip_download', False):
-                    self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
-                    self.assertTrue(tc['file'] in finished_hook_called)
-                self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
+                    self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
+                    self.assertTrue(tc_filename in finished_hook_called)
+                self.assertTrue(os.path.exists(tc_filename + '.info.json'))
                 if 'md5' in tc:
                 if 'md5' in tc:
-                    md5_for_file = _file_md5(tc['file'])
+                    md5_for_file = _file_md5(tc_filename)
                     self.assertEqual(md5_for_file, tc['md5'])
                     self.assertEqual(md5_for_file, tc['md5'])
-                with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
+                with io.open(tc_filename + '.info.json', encoding='utf-8') as infof:
                     info_dict = json.load(infof)
                     info_dict = json.load(infof)
                 for (info_field, expected) in tc.get('info_dict', {}).items():
                 for (info_field, expected) in tc.get('info_dict', {}).items():
                     if isinstance(expected, compat_str) and expected.startswith('md5:'):
                     if isinstance(expected, compat_str) and expected.startswith('md5:'):
@@ -125,11 +154,11 @@ def generator(test_case):
                 # Check for the presence of mandatory fields
                 # Check for the presence of mandatory fields
                 for key in ('id', 'url', 'title', 'ext'):
                 for key in ('id', 'url', 'title', 'ext'):
                     self.assertTrue(key in info_dict.keys() and info_dict[key])
                     self.assertTrue(key in info_dict.keys() and info_dict[key])
+                # Check for mandatory fields that are automatically set by YoutubeDL
+                for key in ['webpage_url', 'extractor', 'extractor_key']:
+                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
         finally:
         finally:
-            for tc in test_cases:
-                try_rm(tc['file'])
-                try_rm(tc['file'] + '.part')
-                try_rm(tc['file'] + '.info.json')
+            try_rm_tcs_files()
 
 
     return test_template
     return test_template
 
 

+ 18 - 0
test/test_playlists.py

@@ -17,9 +17,11 @@ from youtube_dl.extractor import (
     DailymotionUserIE,
     DailymotionUserIE,
     VimeoChannelIE,
     VimeoChannelIE,
     UstreamChannelIE,
     UstreamChannelIE,
+    SoundcloudSetIE,
     SoundcloudUserIE,
     SoundcloudUserIE,
     LivestreamIE,
     LivestreamIE,
     NHLVideocenterIE,
     NHLVideocenterIE,
+    BambuserChannelIE,
 )
 )
 
 
 
 
@@ -60,6 +62,14 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['id'], u'5124905')
         self.assertEqual(result['id'], u'5124905')
         self.assertTrue(len(result['entries']) >= 11)
         self.assertTrue(len(result['entries']) >= 11)
 
 
+    def test_soundcloud_set(self):
+        dl = FakeYDL()
+        ie = SoundcloudSetIE(dl)
+        result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'The Royal Concept EP')
+        self.assertTrue(len(result['entries']) >= 6)
+
     def test_soundcloud_user(self):
     def test_soundcloud_user(self):
         dl = FakeYDL()
         dl = FakeYDL()
         ie = SoundcloudUserIE(dl)
         ie = SoundcloudUserIE(dl)
@@ -85,5 +95,13 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], u'Highlights')
         self.assertEqual(result['title'], u'Highlights')
         self.assertEqual(len(result['entries']), 12)
         self.assertEqual(len(result['entries']), 12)
 
 
+    def test_bambuser_channel(self):
+        dl = FakeYDL()
+        ie = BambuserChannelIE(dl)
+        result = ie.extract('http://bambuser.com/channel/pixelversity')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'pixelversity')
+        self.assertTrue(len(result['entries']) >= 66)
+
 if __name__ == '__main__':
 if __name__ == '__main__':
     unittest.main()
     unittest.main()

+ 211 - 0
test/test_subtitles.py

@@ -0,0 +1,211 @@
+#!/usr/bin/env python
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import FakeYDL, global_setup, md5
+global_setup()
+
+
+from youtube_dl.extractor import (
+    YoutubeIE,
+    DailymotionIE,
+    TEDIE,
+)
+
+
+class BaseTestSubtitles(unittest.TestCase):
+    url = None
+    IE = None
+    def setUp(self):
+        self.DL = FakeYDL()
+        self.ie = self.IE(self.DL)
+
+    def getInfoDict(self):
+        info_dict = self.ie.extract(self.url)
+        return info_dict
+
+    def getSubtitles(self):
+        info_dict = self.getInfoDict()
+        return info_dict['subtitles']
+
+
+class TestYoutubeSubtitles(BaseTestSubtitles):
+    url = 'QRS8MkLhQmM'
+    IE = YoutubeIE
+
+    def getSubtitles(self):
+        info_dict = self.getInfoDict()
+        return info_dict[0]['subtitles']
+
+    def test_youtube_no_writesubtitles(self):
+        self.DL.params['writesubtitles'] = False
+        subtitles = self.getSubtitles()
+        self.assertEqual(subtitles, None)
+
+    def test_youtube_subtitles(self):
+        self.DL.params['writesubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
+
+    def test_youtube_subtitles_lang(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitleslangs'] = ['it']
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
+
+    def test_youtube_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles.keys()), 13)
+
+    def test_youtube_subtitles_sbv_format(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitlesformat'] = 'sbv'
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
+
+    def test_youtube_subtitles_vtt_format(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitlesformat'] = 'vtt'
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
+
+    def test_youtube_list_subtitles(self):
+        self.DL.expect_warning(u'Video doesn\'t have automatic captions')
+        self.DL.params['listsubtitles'] = True
+        info_dict = self.getInfoDict()
+        self.assertEqual(info_dict, None)
+
+    def test_youtube_automatic_captions(self):
+        self.url = '8YoUxe5ncPo'
+        self.DL.params['writeautomaticsub'] = True
+        self.DL.params['subtitleslangs'] = ['it']
+        subtitles = self.getSubtitles()
+        self.assertTrue(subtitles['it'] is not None)
+
+    def test_youtube_nosubtitles(self):
+        self.DL.expect_warning(u'video doesn\'t have subtitles')
+        self.url = 'sAjKT8FhjI8'
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles), 0)
+
+    def test_youtube_multiple_langs(self):
+        self.url = 'QRS8MkLhQmM'
+        self.DL.params['writesubtitles'] = True
+        langs = ['it', 'fr', 'de']
+        self.DL.params['subtitleslangs'] = langs
+        subtitles = self.getSubtitles()
+        for lang in langs:
+            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
+
+
+class TestDailymotionSubtitles(BaseTestSubtitles):
+    url = 'http://www.dailymotion.com/video/xczg00'
+    IE = DailymotionIE
+
+    def test_no_writesubtitles(self):
+        subtitles = self.getSubtitles()
+        self.assertEqual(subtitles, None)
+
+    def test_subtitles(self):
+        self.DL.params['writesubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
+
+    def test_subtitles_lang(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitleslangs'] = ['fr']
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles.keys()), 5)
+
+    def test_list_subtitles(self):
+        self.DL.expect_warning(u'Automatic Captions not supported by this server')
+        self.DL.params['listsubtitles'] = True
+        info_dict = self.getInfoDict()
+        self.assertEqual(info_dict, None)
+
+    def test_automatic_captions(self):
+        self.DL.expect_warning(u'Automatic Captions not supported by this server')
+        self.DL.params['writeautomaticsub'] = True
+        self.DL.params['subtitleslang'] = ['en']
+        subtitles = self.getSubtitles()
+        self.assertTrue(len(subtitles.keys()) == 0)
+
+    def test_nosubtitles(self):
+        self.DL.expect_warning(u'video doesn\'t have subtitles')
+        self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles), 0)
+
+    def test_multiple_langs(self):
+        self.DL.params['writesubtitles'] = True
+        langs = ['es', 'fr', 'de']
+        self.DL.params['subtitleslangs'] = langs
+        subtitles = self.getSubtitles()
+        for lang in langs:
+            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
+
+
+class TestTedSubtitles(BaseTestSubtitles):
+    url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
+    IE = TEDIE
+
+    def test_no_writesubtitles(self):
+        subtitles = self.getSubtitles()
+        self.assertEqual(subtitles, None)
+
+    def test_subtitles(self):
+        self.DL.params['writesubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d')
+
+    def test_subtitles_lang(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['subtitleslangs'] = ['fr']
+        subtitles = self.getSubtitles()
+        self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6')
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles.keys()), 28)
+
+    def test_list_subtitles(self):
+        self.DL.expect_warning(u'Automatic Captions not supported by this server')
+        self.DL.params['listsubtitles'] = True
+        info_dict = self.getInfoDict()
+        self.assertEqual(info_dict, None)
+
+    def test_automatic_captions(self):
+        self.DL.expect_warning(u'Automatic Captions not supported by this server')
+        self.DL.params['writeautomaticsub'] = True
+        self.DL.params['subtitleslang'] = ['en']
+        subtitles = self.getSubtitles()
+        self.assertTrue(len(subtitles.keys()) == 0)
+
+    def test_multiple_langs(self):
+        self.DL.params['writesubtitles'] = True
+        langs = ['es', 'fr', 'de']
+        self.DL.params['subtitleslangs'] = langs
+        subtitles = self.getSubtitles()
+        for lang in langs:
+            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
+
+if __name__ == '__main__':
+    unittest.main()

+ 0 - 95
test/test_youtube_subtitles.py

@@ -1,95 +0,0 @@
-#!/usr/bin/env python
-
-# Allow direct execution
-import os
-import sys
-import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from test.helper import FakeYDL, global_setup, md5
-global_setup()
-
-
-from youtube_dl.extractor import YoutubeIE
-
-
-class TestYoutubeSubtitles(unittest.TestCase):
-    def setUp(self):
-        self.DL = FakeYDL()
-        self.url = 'QRS8MkLhQmM'
-
-    def getInfoDict(self):
-        IE = YoutubeIE(self.DL)
-        info_dict = IE.extract(self.url)
-        return info_dict
-
-    def getSubtitles(self):
-        info_dict = self.getInfoDict()
-        return info_dict[0]['subtitles']
-
-    def test_youtube_no_writesubtitles(self):
-        self.DL.params['writesubtitles'] = False
-        subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
-
-    def test_youtube_subtitles(self):
-        self.DL.params['writesubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
-
-    def test_youtube_subtitles_lang(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitleslangs'] = ['it']
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
-
-    def test_youtube_allsubtitles(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['allsubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles.keys()), 13)
-
-    def test_youtube_subtitles_sbv_format(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitlesformat'] = 'sbv'
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
-
-    def test_youtube_subtitles_vtt_format(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitlesformat'] = 'vtt'
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
-
-    def test_youtube_list_subtitles(self):
-        self.DL.expect_warning(u'Video doesn\'t have automatic captions')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
-    def test_youtube_automatic_captions(self):
-        self.url = '8YoUxe5ncPo'
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslangs'] = ['it']
-        subtitles = self.getSubtitles()
-        self.assertTrue(subtitles['it'] is not None)
-
-    def test_youtube_nosubtitles(self):
-        self.DL.expect_warning(u'video doesn\'t have subtitles')
-        self.url = 'sAjKT8FhjI8'
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['allsubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
-
-    def test_youtube_multiple_langs(self):
-        self.url = 'QRS8MkLhQmM'
-        self.DL.params['writesubtitles'] = True
-        langs = ['it', 'fr', 'de']
-        self.DL.params['subtitleslangs'] = langs
-        subtitles = self.getSubtitles()
-        for lang in langs:
-            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
-
-if __name__ == '__main__':
-    unittest.main()

+ 42 - 33
youtube_dl/FileDownloader.py

@@ -4,12 +4,16 @@ import re
 import subprocess
 import subprocess
 import sys
 import sys
 import time
 import time
-import traceback
 
 
-if os.name == 'nt':
-    import ctypes
-
-from .utils import *
+from .utils import (
+    compat_urllib_error,
+    compat_urllib_request,
+    ContentTooShortError,
+    determine_ext,
+    encodeFilename,
+    sanitize_open,
+    timeconvert,
+)
 
 
 
 
 class FileDownloader(object):
 class FileDownloader(object):
@@ -144,16 +148,8 @@ class FileDownloader(object):
     def to_stderr(self, message):
     def to_stderr(self, message):
         self.ydl.to_screen(message)
         self.ydl.to_screen(message)
 
 
-    def to_cons_title(self, message):
-        """Set console/terminal window title to message."""
-        if not self.params.get('consoletitle', False):
-            return
-        if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
-            # c_wchar_p() might not be necessary if `message` is
-            # already of type unicode()
-            ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
-        elif 'TERM' in os.environ:
-            self.to_screen('\033]0;%s\007' % message, skip_eol=True)
+    def to_console_title(self, message):
+        self.ydl.to_console_title(message)
 
 
     def trouble(self, *args, **kargs):
     def trouble(self, *args, **kargs):
         self.ydl.trouble(*args, **kargs)
         self.ydl.trouble(*args, **kargs)
@@ -194,7 +190,7 @@ class FileDownloader(object):
             if old_filename == new_filename:
             if old_filename == new_filename:
                 return
                 return
             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
-        except (IOError, OSError) as err:
+        except (IOError, OSError):
             self.report_error(u'unable to rename file')
             self.report_error(u'unable to rename file')
 
 
     def try_utime(self, filename, last_modified_hdr):
     def try_utime(self, filename, last_modified_hdr):
@@ -227,8 +223,14 @@ class FileDownloader(object):
         if self.params.get('noprogress', False):
         if self.params.get('noprogress', False):
             return
             return
         clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
         clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
-        eta_str = self.format_eta(eta)
-        percent_str = self.format_percent(percent)
+        if eta is not None:
+            eta_str = self.format_eta(eta)
+        else:
+            eta_str = 'Unknown ETA'
+        if percent is not None:
+            percent_str = self.format_percent(percent)
+        else:
+            percent_str = 'Unknown %'
         speed_str = self.format_speed(speed)
         speed_str = self.format_speed(speed)
         if self.params.get('progress_with_newline', False):
         if self.params.get('progress_with_newline', False):
             self.to_screen(u'[download] %s of %s at %s ETA %s' %
             self.to_screen(u'[download] %s of %s at %s ETA %s' %
@@ -236,7 +238,7 @@ class FileDownloader(object):
         else:
         else:
             self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
             self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
                 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
                 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
-        self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
+        self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 
 
     def report_resuming_byte(self, resume_len):
     def report_resuming_byte(self, resume_len):
@@ -251,7 +253,7 @@ class FileDownloader(object):
         """Report file has already been fully downloaded."""
         """Report file has already been fully downloaded."""
         try:
         try:
             self.to_screen(u'[download] %s has already been downloaded' % file_name)
             self.to_screen(u'[download] %s has already been downloaded' % file_name)
-        except (UnicodeEncodeError) as err:
+        except UnicodeEncodeError:
             self.to_screen(u'[download] The file has already been downloaded')
             self.to_screen(u'[download] The file has already been downloaded')
 
 
     def report_unable_to_resume(self):
     def report_unable_to_resume(self):
@@ -267,7 +269,7 @@ class FileDownloader(object):
             self.to_screen(u'\r%s[download] 100%% of %s in %s' %
             self.to_screen(u'\r%s[download] 100%% of %s in %s' %
                 (clear_line, data_len_str, self.format_seconds(tot_time)))
                 (clear_line, data_len_str, self.format_seconds(tot_time)))
 
 
-    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
+    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
         def run_rtmpdump(args):
         def run_rtmpdump(args):
             start = time.time()
             start = time.time()
             resume_percent = None
             resume_percent = None
@@ -348,6 +350,8 @@ class FileDownloader(object):
             basic_args += ['--tcUrl', url]
             basic_args += ['--tcUrl', url]
         if test:
         if test:
             basic_args += ['--stop', '1']
             basic_args += ['--stop', '1']
+        if live:
+            basic_args += ['--live']
         args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
         args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
         if self.params.get('verbose', False):
         if self.params.get('verbose', False):
             try:
             try:
@@ -422,15 +426,20 @@ class FileDownloader(object):
         self.report_destination(filename)
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
         tmpfilename = self.temp_name(filename)
 
 
-        args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
-        # Check for ffmpeg first
-        try:
-            subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
-        except (OSError, IOError):
-            self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
-            return False
+        args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
+            '-bsf:a', 'aac_adtstoasc', tmpfilename]
 
 
-        retval = subprocess.call(args)
+        for program in ['avconv', 'ffmpeg']:
+            try:
+                subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+                break
+            except (OSError, IOError):
+                pass
+        else:
+            self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
+        cmd = [program] + args
+
+        retval = subprocess.call(cmd)
         if retval == 0:
         if retval == 0:
             fsize = os.path.getsize(encodeFilename(tmpfilename))
             fsize = os.path.getsize(encodeFilename(tmpfilename))
             self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
             self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
@@ -467,7 +476,8 @@ class FileDownloader(object):
                                                 info_dict.get('player_url', None),
                                                 info_dict.get('player_url', None),
                                                 info_dict.get('page_url', None),
                                                 info_dict.get('page_url', None),
                                                 info_dict.get('play_path', None),
                                                 info_dict.get('play_path', None),
-                                                info_dict.get('tc_url', None))
+                                                info_dict.get('tc_url', None),
+                                                info_dict.get('rtmp_live', False))
 
 
         # Attempt to download using mplayer
         # Attempt to download using mplayer
         if url.startswith('mms') or url.startswith('rtsp'):
         if url.startswith('mms') or url.startswith('rtsp'):
@@ -606,12 +616,11 @@ class FileDownloader(object):
             # Progress message
             # Progress message
             speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
             speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
             if data_len is None:
             if data_len is None:
-                self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
-                eta = None
+                eta = percent = None
             else:
             else:
                 percent = self.calc_percent(byte_counter, data_len)
                 percent = self.calc_percent(byte_counter, data_len)
                 eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
                 eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
-                self.report_progress(percent, data_len_str, speed, eta)
+            self.report_progress(percent, data_len_str, speed, eta)
 
 
             self._hook_progress({
             self._hook_progress({
                 'downloaded_bytes': byte_counter,
                 'downloaded_bytes': byte_counter,

+ 1 - 1
youtube_dl/PostProcessor.py

@@ -501,7 +501,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
 
 
         options = ['-c', 'copy']
         options = ['-c', 'copy']
         for (name, value) in metadata.items():
         for (name, value) in metadata.items():
-            options.extend(['-metadata', '%s="%s"' % (name, value)])
+            options.extend(['-metadata', '%s=%s' % (name, value)])
         options.extend(['-f', ext])
         options.extend(['-f', ext])
 
 
         self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
         self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)

+ 126 - 32
youtube_dl/YoutubeDL.py

@@ -13,7 +13,34 @@ import sys
 import time
 import time
 import traceback
 import traceback
 
 
-from .utils import *
+if os.name == 'nt':
+    import ctypes
+
+from .utils import (
+    compat_http_client,
+    compat_print,
+    compat_str,
+    compat_urllib_error,
+    compat_urllib_request,
+    ContentTooShortError,
+    date_from_str,
+    DateRange,
+    determine_ext,
+    DownloadError,
+    encodeFilename,
+    ExtractorError,
+    locked_file,
+    MaxDownloadsReached,
+    PostProcessingError,
+    preferredencoding,
+    SameFileError,
+    sanitize_filename,
+    subtitles_filename,
+    takewhile_inclusive,
+    UnavailableVideoError,
+    write_json_file,
+    write_string,
+)
 from .extractor import get_info_extractor, gen_extractors
 from .extractor import get_info_extractor, gen_extractors
 from .FileDownloader import FileDownloader
 from .FileDownloader import FileDownloader
 
 
@@ -176,6 +203,35 @@ class YoutubeDL(object):
             output = output.encode(preferredencoding())
             output = output.encode(preferredencoding())
         sys.stderr.write(output)
         sys.stderr.write(output)
 
 
+    def to_console_title(self, message):
+        if not self.params.get('consoletitle', False):
+            return
+        if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
+            # c_wchar_p() might not be necessary if `message` is
+            # already of type unicode()
+            ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
+        elif 'TERM' in os.environ:
+            write_string(u'\033]0;%s\007' % message, self._screen_file)
+
+    def save_console_title(self):
+        if not self.params.get('consoletitle', False):
+            return
+        if 'TERM' in os.environ:
+            write_string(u'\033[22t', self._screen_file)
+
+    def restore_console_title(self):
+        if not self.params.get('consoletitle', False):
+            return
+        if 'TERM' in os.environ:
+            write_string(u'\033[23t', self._screen_file)
+
+    def __enter__(self):
+        self.save_console_title()
+        return self
+
+    def __exit__(self, *args):
+        self.restore_console_title()
+
     def fixed_template(self):
     def fixed_template(self):
         """Checks if the output template is fixed."""
         """Checks if the output template is fixed."""
         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
@@ -254,7 +310,7 @@ class YoutubeDL(object):
         """Report file has already been fully downloaded."""
         """Report file has already been fully downloaded."""
         try:
         try:
             self.to_screen(u'[download] %s has already been downloaded' % file_name)
             self.to_screen(u'[download] %s has already been downloaded' % file_name)
-        except (UnicodeEncodeError) as err:
+        except UnicodeEncodeError:
             self.to_screen(u'[download] The file has already been downloaded')
             self.to_screen(u'[download] The file has already been downloaded')
 
 
     def increment_downloads(self):
     def increment_downloads(self):
@@ -272,7 +328,7 @@ class YoutubeDL(object):
                 autonumber_size = 5
                 autonumber_size = 5
             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
             template_dict['autonumber'] = autonumber_templ % self._num_downloads
             template_dict['autonumber'] = autonumber_templ % self._num_downloads
-            if template_dict['playlist_index'] is not None:
+            if template_dict.get('playlist_index') is not None:
                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 
 
             sanitize = lambda k, v: sanitize_filename(
             sanitize = lambda k, v: sanitize_filename(
@@ -318,6 +374,12 @@ class YoutubeDL(object):
                     % info_dict)
                     % info_dict)
         return None
         return None
 
 
+    @staticmethod
+    def add_extra_info(info_dict, extra_info):
+        '''Set the keys from extra_info in info dict if they are missing'''
+        for key, value in extra_info.items():
+            info_dict.setdefault(key, value)
+
     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
         '''
         '''
         Returns a list with a dictionary for each video we find.
         Returns a list with a dictionary for each video we find.
@@ -344,17 +406,17 @@ class YoutubeDL(object):
                     break
                     break
                 if isinstance(ie_result, list):
                 if isinstance(ie_result, list):
                     # Backwards compatibility: old IE result format
                     # Backwards compatibility: old IE result format
-                    for result in ie_result:
-                        result.update(extra_info)
                     ie_result = {
                     ie_result = {
                         '_type': 'compat_list',
                         '_type': 'compat_list',
                         'entries': ie_result,
                         'entries': ie_result,
                     }
                     }
-                else:
-                    ie_result.update(extra_info)
-                if 'extractor' not in ie_result:
-                    ie_result['extractor'] = ie.IE_NAME
-                return self.process_ie_result(ie_result, download=download)
+                self.add_extra_info(ie_result,
+                    {
+                        'extractor': ie.IE_NAME,
+                        'webpage_url': url,
+                        'extractor_key': ie.ie_key(),
+                    })
+                return self.process_ie_result(ie_result, download, extra_info)
             except ExtractorError as de: # An error we somewhat expected
             except ExtractorError as de: # An error we somewhat expected
                 self.report_error(compat_str(de), de.format_traceback())
                 self.report_error(compat_str(de), de.format_traceback())
                 break
                 break
@@ -378,8 +440,8 @@ class YoutubeDL(object):
 
 
         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
         if result_type == 'video':
         if result_type == 'video':
-            ie_result.update(extra_info)
-            return self.process_video_result(ie_result)
+            self.add_extra_info(ie_result, extra_info)
+            return self.process_video_result(ie_result, download=download)
         elif result_type == 'url':
         elif result_type == 'url':
             # We have to add extra_info to the results because it may be
             # We have to add extra_info to the results because it may be
             # contained in a playlist
             # contained in a playlist
@@ -388,6 +450,7 @@ class YoutubeDL(object):
                                      ie_key=ie_result.get('ie_key'),
                                      ie_key=ie_result.get('ie_key'),
                                      extra_info=extra_info)
                                      extra_info=extra_info)
         elif result_type == 'playlist':
         elif result_type == 'playlist':
+            self.add_extra_info(ie_result, extra_info)
             # We process each entry in the playlist
             # We process each entry in the playlist
             playlist = ie_result.get('title', None) or ie_result.get('id', None)
             playlist = ie_result.get('title', None) or ie_result.get('id', None)
             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -413,12 +476,10 @@ class YoutubeDL(object):
                 extra = {
                 extra = {
                     'playlist': playlist,
                     'playlist': playlist,
                     'playlist_index': i + playliststart,
                     'playlist_index': i + playliststart,
+                    'extractor': ie_result['extractor'],
+                    'webpage_url': ie_result['webpage_url'],
+                    'extractor_key': ie_result['extractor_key'],
                 }
                 }
-                if not 'extractor' in entry:
-                    # We set the extractor, if it's an url it will be set then to
-                    # the new extractor, but if it's already a video we must make
-                    # sure it's present: see issue #877
-                    entry['extractor'] = ie_result['extractor']
                 entry_result = self.process_ie_result(entry,
                 entry_result = self.process_ie_result(entry,
                                                       download=download,
                                                       download=download,
                                                       extra_info=extra)
                                                       extra_info=extra)
@@ -427,10 +488,15 @@ class YoutubeDL(object):
             return ie_result
             return ie_result
         elif result_type == 'compat_list':
         elif result_type == 'compat_list':
             def _fixup(r):
             def _fixup(r):
-                r.setdefault('extractor', ie_result['extractor'])
+                self.add_extra_info(r,
+                    {
+                        'extractor': ie_result['extractor'],
+                        'webpage_url': ie_result['webpage_url'],
+                        'extractor_key': ie_result['extractor_key'],
+                    })
                 return r
                 return r
             ie_result['entries'] = [
             ie_result['entries'] = [
-                self.process_ie_result(_fixup(r), download=download)
+                self.process_ie_result(_fixup(r), download, extra_info)
                 for r in ie_result['entries']
                 for r in ie_result['entries']
             ]
             ]
             return ie_result
             return ie_result
@@ -482,7 +548,7 @@ class YoutubeDL(object):
                 format['format'] = u'{id} - {res}{note}'.format(
                 format['format'] = u'{id} - {res}{note}'.format(
                     id=format['format_id'],
                     id=format['format_id'],
                     res=self.format_resolution(format),
                     res=self.format_resolution(format),
-                    note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '',
+                    note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
                 )
                 )
             # Automatically determine file extension if missing
             # Automatically determine file extension if missing
             if 'ext' not in format:
             if 'ext' not in format:
@@ -630,7 +696,7 @@ class YoutubeDL(object):
             # subtitles download errors are already managed as troubles in relevant IE
             # subtitles download errors are already managed as troubles in relevant IE
             # that way it will silently go on when used with unsupporting IE
             # that way it will silently go on when used with unsupporting IE
             subtitles = info_dict['subtitles']
             subtitles = info_dict['subtitles']
-            sub_format = self.params.get('subtitlesformat')
+            sub_format = self.params.get('subtitlesformat', 'srt')
             for sub_lang in subtitles.keys():
             for sub_lang in subtitles.keys():
                 sub = subtitles[sub_lang]
                 sub = subtitles[sub_lang]
                 if sub is None:
                 if sub is None:
@@ -759,6 +825,8 @@ class YoutubeDL(object):
 
 
     @staticmethod
     @staticmethod
     def format_resolution(format, default='unknown'):
     def format_resolution(format, default='unknown'):
+        if format.get('_resolution') is not None:
+            return format['_resolution']
         if format.get('height') is not None:
         if format.get('height') is not None:
             if format.get('width') is not None:
             if format.get('width') is not None:
                 res = u'%sx%s' % (format['width'], format['height'])
                 res = u'%sx%s' % (format['width'], format['height'])
@@ -769,19 +837,45 @@ class YoutubeDL(object):
         return res
         return res
 
 
     def list_formats(self, info_dict):
     def list_formats(self, info_dict):
-        formats_s = []
-        for format in info_dict.get('formats', [info_dict]):
-            formats_s.append(u'%-15s%-7s     %-15s%s' % (
+        def format_note(fdict):
+            if fdict.get('format_note') is not None:
+                return fdict['format_note']
+            res = u''
+            if fdict.get('vcodec') is not None:
+                res += u'%-5s' % fdict['vcodec']
+            elif fdict.get('vbr') is not None:
+                res += u'video'
+            if fdict.get('vbr') is not None:
+                res += u'@%4dk' % fdict['vbr']
+            if fdict.get('acodec') is not None:
+                if res:
+                    res += u', '
+                res += u'%-5s' % fdict['acodec']
+            elif fdict.get('abr') is not None:
+                if res:
+                    res += u', '
+                res += 'audio'
+            if fdict.get('abr') is not None:
+                res += u'@%3dk' % fdict['abr']
+            return res
+
+        def line(format):
+            return (u'%-20s%-10s%-12s%s' % (
                 format['format_id'],
                 format['format_id'],
                 format['ext'],
                 format['ext'],
-                format.get('format_note', ''),
                 self.format_resolution(format),
                 self.format_resolution(format),
+                format_note(format),
                 )
                 )
             )
             )
-        if len(formats_s) != 1:
-            formats_s[0] += ' (worst)'
-            formats_s[-1] += ' (best)'
-        formats_s = "\n".join(formats_s)
-        self.to_screen(u'[info] Available formats for %s:\n'
-            u'format code    extension   note           resolution\n%s' % (
-                info_dict['id'], formats_s))
+
+        formats = info_dict.get('formats', [info_dict])
+        formats_s = list(map(line, formats))
+        if len(formats) > 1:
+            formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
+            formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
+
+        header_line = line({
+            'format_id': u'format code', 'ext': u'extension',
+            '_resolution': u'resolution', 'format_note': u'note'})
+        self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
+                       (info_dict['id'], header_line, u"\n".join(formats_s)))

+ 58 - 55
youtube_dl/__init__.py

@@ -32,6 +32,8 @@ __authors__  = (
     'Ismael Mejía',
     'Ismael Mejía',
     'Steffan \'Ruirize\' James',
     'Steffan \'Ruirize\' James',
     'Andras Elso',
     'Andras Elso',
+    'Jelle van der Waa',
+    'Marcin Cieślak',
 )
 )
 
 
 __license__ = 'Public Domain'
 __license__ = 'Public Domain'
@@ -349,7 +351,7 @@ def parseOpts(overrideArguments=None):
                   'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
                   'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
     filesystem.add_option('--autonumber-size',
     filesystem.add_option('--autonumber-size',
             dest='autonumber_size', metavar='NUMBER',
             dest='autonumber_size', metavar='NUMBER',
-            help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given')
+            help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
     filesystem.add_option('--restrict-filenames',
     filesystem.add_option('--restrict-filenames',
             action='store_true', dest='restrictfilenames',
             action='store_true', dest='restrictfilenames',
             help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
             help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
@@ -358,7 +360,7 @@ def parseOpts(overrideArguments=None):
     filesystem.add_option('-w', '--no-overwrites',
     filesystem.add_option('-w', '--no-overwrites',
             action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
             action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
     filesystem.add_option('-c', '--continue',
     filesystem.add_option('-c', '--continue',
-            action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
+            action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
     filesystem.add_option('--no-continue',
     filesystem.add_option('--no-continue',
             action='store_false', dest='continue_dl',
             action='store_false', dest='continue_dl',
             help='do not resume partially downloaded files (restart from beginning)')
             help='do not resume partially downloaded files (restart from beginning)')
@@ -601,8 +603,7 @@ def _real_main(argv=None):
                      u' file! Use "%%(ext)s" instead of %r' %
                      u' file! Use "%%(ext)s" instead of %r' %
                      determine_ext(outtmpl, u''))
                      determine_ext(outtmpl, u''))
 
 
-    # YoutubeDL
-    ydl = YoutubeDL({
+    ydl_opts = {
         'usenetrc': opts.usenetrc,
         'usenetrc': opts.usenetrc,
         'username': opts.username,
         'username': opts.username,
         'password': opts.password,
         'password': opts.password,
@@ -665,61 +666,63 @@ def _real_main(argv=None):
         'youtube_print_sig_code': opts.youtube_print_sig_code,
         'youtube_print_sig_code': opts.youtube_print_sig_code,
         'age_limit': opts.age_limit,
         'age_limit': opts.age_limit,
         'download_archive': opts.download_archive,
         'download_archive': opts.download_archive,
-        })
+    }
 
 
-    if opts.verbose:
-        write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
-        try:
-            sp = subprocess.Popen(
-                ['git', 'rev-parse', '--short', 'HEAD'],
-                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                cwd=os.path.dirname(os.path.abspath(__file__)))
-            out, err = sp.communicate()
-            out = out.decode().strip()
-            if re.match('[0-9a-f]+', out):
-                write_string(u'[debug] Git HEAD: ' + out + u'\n')
-        except:
+    with YoutubeDL(ydl_opts) as ydl:
+        if opts.verbose:
+            write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
             try:
             try:
-                sys.exc_clear()
+                sp = subprocess.Popen(
+                    ['git', 'rev-parse', '--short', 'HEAD'],
+                    stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                    cwd=os.path.dirname(os.path.abspath(__file__)))
+                out, err = sp.communicate()
+                out = out.decode().strip()
+                if re.match('[0-9a-f]+', out):
+                    write_string(u'[debug] Git HEAD: ' + out + u'\n')
             except:
             except:
-                pass
-        write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
-
-        proxy_map = {}
-        for handler in opener.handlers:
-            if hasattr(handler, 'proxies'):
-                proxy_map.update(handler.proxies)
-        write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
-
-    ydl.add_default_info_extractors()
-
-    # PostProcessors
-    # Add the metadata pp first, the other pps will copy it
-    if opts.addmetadata:
-        ydl.add_post_processor(FFmpegMetadataPP())
-    if opts.extractaudio:
-        ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
-    if opts.recodevideo:
-        ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
-    if opts.embedsubtitles:
-        ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
-
-    # Update version
-    if opts.update_self:
-        update_self(ydl.to_screen, opts.verbose)
-
-    # Maybe do nothing
-    if len(all_urls) < 1:
-        if not opts.update_self:
-            parser.error(u'you must provide at least one URL')
-        else:
-            sys.exit()
+                try:
+                    sys.exc_clear()
+                except:
+                    pass
+            write_string(u'[debug] Python version %s - %s' %
+                         (platform.python_version(), platform_name()) + u'\n')
+
+            proxy_map = {}
+            for handler in opener.handlers:
+                if hasattr(handler, 'proxies'):
+                    proxy_map.update(handler.proxies)
+            write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
+
+        ydl.add_default_info_extractors()
+
+        # PostProcessors
+        # Add the metadata pp first, the other pps will copy it
+        if opts.addmetadata:
+            ydl.add_post_processor(FFmpegMetadataPP())
+        if opts.extractaudio:
+            ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
+        if opts.recodevideo:
+            ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
+        if opts.embedsubtitles:
+            ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
+
+        # Update version
+        if opts.update_self:
+            update_self(ydl.to_screen, opts.verbose)
+
+        # Maybe do nothing
+        if len(all_urls) < 1:
+            if not opts.update_self:
+                parser.error(u'you must provide at least one URL')
+            else:
+                sys.exit()
 
 
-    try:
-        retcode = ydl.download(all_urls)
-    except MaxDownloadsReached:
-        ydl.to_screen(u'--max-download limit reached, aborting.')
-        retcode = 101
+        try:
+            retcode = ydl.download(all_urls)
+        except MaxDownloadsReached:
+            ydl.to_screen(u'--max-download limit reached, aborting.')
+            retcode = 101
 
 
     # Dump cookie jar if requested
     # Dump cookie jar if requested
     if opts.cookiefile is not None:
     if opts.cookiefile is not None:

+ 16 - 2
youtube_dl/extractor/__init__.py

@@ -9,6 +9,7 @@ from .arte import (
     ArteTVFutureIE,
     ArteTVFutureIE,
 )
 )
 from .auengine import AUEngineIE
 from .auengine import AUEngineIE
+from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE
 from .bandcamp import BandcampIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .bloomberg import BloombergIE
@@ -37,8 +38,10 @@ from .defense import DefenseGouvFrIE
 from .ebaumsworld import EbaumsWorldIE
 from .ebaumsworld import EbaumsWorldIE
 from .ehow import EHowIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .eighttracks import EightTracksIE
+from .eitb import EitbIE
 from .escapist import EscapistIE
 from .escapist import EscapistIE
 from .exfm import ExfmIE
 from .exfm import ExfmIE
+from .extremetube import ExtremeTubeIE
 from .facebook import FacebookIE
 from .facebook import FacebookIE
 from .faz import FazIE
 from .faz import FazIE
 from .fktv import (
 from .fktv import (
@@ -54,6 +57,7 @@ from .francetv import (
 )
 )
 from .freesound import FreesoundIE
 from .freesound import FreesoundIE
 from .funnyordie import FunnyOrDieIE
 from .funnyordie import FunnyOrDieIE
+from .gamekings import GamekingsIE
 from .gamespot import GameSpotIE
 from .gamespot import GameSpotIE
 from .gametrailers import GametrailersIE
 from .gametrailers import GametrailersIE
 from .generic import GenericIE
 from .generic import GenericIE
@@ -76,13 +80,15 @@ from .keezmovies import KeezMoviesIE
 from .kickstarter import KickStarterIE
 from .kickstarter import KickStarterIE
 from .keek import KeekIE
 from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .liveleak import LiveLeakIE
-from .livestream import LivestreamIE
+from .livestream import LivestreamIE, LivestreamOriginalIE
 from .metacafe import MetacafeIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .metacritic import MetacriticIE
 from .mit import TechTVMITIE, MITIE
 from .mit import TechTVMITIE, MITIE
 from .mixcloud import MixcloudIE
 from .mixcloud import MixcloudIE
+from .mofosex import MofosexIE
 from .mtv import MTVIE
 from .mtv import MTVIE
 from .muzu import MuzuTVIE
 from .muzu import MuzuTVIE
+from .myspace import MySpaceIE
 from .myspass import MySpassIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .myvideo import MyVideoIE
 from .naver import NaverIE
 from .naver import NaverIE
@@ -110,7 +116,11 @@ from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
 from .slideshare import SlideshareIE
 from .sohu import SohuIE
 from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
-from .southparkstudios import SouthParkStudiosIE
+from .southparkstudios import (
+    SouthParkStudiosIE,
+    SouthparkDeIE,
+)
+from .space import SpaceIE
 from .spankwire import SpankwireIE
 from .spankwire import SpankwireIE
 from .spiegel import SpiegelIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .stanfordoc import StanfordOpenClassroomIE
@@ -128,6 +138,7 @@ from .tube8 import Tube8IE
 from .tudou import TudouIE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
 from .tutv import TutvIE
+from .tvp import TvpIE
 from .unistra import UnistraIE
 from .unistra import UnistraIE
 from .ustream import UstreamIE, UstreamChannelIE
 from .ustream import UstreamIE, UstreamChannelIE
 from .vbox7 import Vbox7IE
 from .vbox7 import Vbox7IE
@@ -141,6 +152,7 @@ from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .videopremium import VideoPremiumIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .vine import VineIE
+from .vk import VKIE
 from .wat import WatIE
 from .wat import WatIE
 from .websurg import WeBSurgIE
 from .websurg import WeBSurgIE
 from .weibo import WeiboIE
 from .weibo import WeiboIE
@@ -149,6 +161,7 @@ from .worldstarhiphop import WorldStarHipHopIE
 from .xhamster import XHamsterIE
 from .xhamster import XHamsterIE
 from .xnxx import XNXXIE
 from .xnxx import XNXXIE
 from .xvideos import XVideosIE
 from .xvideos import XVideosIE
+from .xtube import XTubeIE
 from .yahoo import YahooIE, YahooSearchIE
 from .yahoo import YahooIE, YahooSearchIE
 from .youjizz import YouJizzIE
 from .youjizz import YouJizzIE
 from .youku import YoukuIE
 from .youku import YoukuIE
@@ -157,6 +170,7 @@ from .youtube import (
     YoutubeIE,
     YoutubeIE,
     YoutubePlaylistIE,
     YoutubePlaylistIE,
     YoutubeSearchIE,
     YoutubeSearchIE,
+    YoutubeSearchDateIE,
     YoutubeUserIE,
     YoutubeUserIE,
     YoutubeChannelIE,
     YoutubeChannelIE,
     YoutubeShowIE,
     YoutubeShowIE,

+ 35 - 17
youtube_dl/extractor/arte.py

@@ -10,6 +10,7 @@ from ..utils import (
     unified_strdate,
     unified_strdate,
     determine_ext,
     determine_ext,
     get_element_by_id,
     get_element_by_id,
+    compat_str,
 )
 )
 
 
 # There are different sources of video in arte.tv, the extraction process 
 # There are different sources of video in arte.tv, the extraction process 
@@ -68,7 +69,7 @@ class ArteTvIE(InfoExtractor):
             lang = mobj.group('lang')
             lang = mobj.group('lang')
             return self._extract_liveweb(url, name, lang)
             return self._extract_liveweb(url, name, lang)
 
 
-        if re.search(self._LIVE_URL, video_id) is not None:
+        if re.search(self._LIVE_URL, url) is not None:
             raise ExtractorError(u'Arte live streams are not yet supported, sorry')
             raise ExtractorError(u'Arte live streams are not yet supported, sorry')
             # self.extractLiveStream(url)
             # self.extractLiveStream(url)
             # return
             # return
@@ -114,7 +115,7 @@ class ArteTvIE(InfoExtractor):
         event_doc = config_doc.find('event')
         event_doc = config_doc.find('event')
         url_node = event_doc.find('video').find('urlHd')
         url_node = event_doc.find('video').find('urlHd')
         if url_node is None:
         if url_node is None:
-            url_node = video_doc.find('urlSd')
+            url_node = event_doc.find('urlSd')
 
 
         return {'id': video_id,
         return {'id': video_id,
                 'title': event_doc.find('name%s' % lang.capitalize()).text,
                 'title': event_doc.find('name%s' % lang.capitalize()).text,
@@ -158,7 +159,9 @@ class ArteTVPlus7IE(InfoExtractor):
             'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
             'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
         }
         }
 
 
-        formats = player_info['VSR'].values()
+        all_formats = player_info['VSR'].values()
+        # Some formats use the m3u8 protocol
+        all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
         def _match_lang(f):
         def _match_lang(f):
             if f.get('versionCode') is None:
             if f.get('versionCode') is None:
                 return True
                 return True
@@ -170,24 +173,39 @@ class ArteTVPlus7IE(InfoExtractor):
             regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
             regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
             return any(re.match(r, f['versionCode']) for r in regexes)
             return any(re.match(r, f['versionCode']) for r in regexes)
         # Some formats may not be in the same language as the url
         # Some formats may not be in the same language as the url
-        formats = filter(_match_lang, formats)
-        # Some formats use the m3u8 protocol
-        formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats)
-        # We order the formats by quality
+        formats = filter(_match_lang, all_formats)
         formats = list(formats) # in python3 filter returns an iterator
         formats = list(formats) # in python3 filter returns an iterator
+        if not formats:
+            # Some videos are only available in the 'Originalversion'
+            # they aren't tagged as being in French or German
+            if all(f['versionCode'] == 'VO' for f in all_formats):
+                formats = all_formats
+            else:
+                raise ExtractorError(u'The formats list is empty')
+
         if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
         if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
-            sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
+            def sort_key(f):
+                return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
         else:
         else:
-            sort_key = lambda f: int(f.get('height',-1))
+            def sort_key(f):
+                return (
+                    # Sort first by quality
+                    int(f.get('height',-1)),
+                    int(f.get('bitrate',-1)),
+                    # The original version with subtitles has lower relevance
+                    re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
+                    # The version with sourds/mal subtitles has also lower relevance
+                    re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
+                )
         formats = sorted(formats, key=sort_key)
         formats = sorted(formats, key=sort_key)
-        # Prefer videos without subtitles in the same language
-        formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
-        # Pick the best quality
         def _format(format_info):
         def _format(format_info):
-            quality = format_info['quality']
-            m_quality = re.match(r'\w*? - (\d*)p', quality)
-            if m_quality is not None:
-                quality = m_quality.group(1)
+            quality = ''
+            height = format_info.get('height')
+            if height is not None:
+                quality = compat_str(height)
+            bitrate = format_info.get('bitrate')
+            if bitrate is not None:
+                quality += '-%d' % bitrate
             if format_info.get('versionCode') is not None:
             if format_info.get('versionCode') is not None:
                 format_id = u'%s-%s' % (quality, format_info['versionCode'])
                 format_id = u'%s-%s' % (quality, format_info['versionCode'])
             else:
             else:
@@ -196,7 +214,7 @@ class ArteTVPlus7IE(InfoExtractor):
                 'format_id': format_id,
                 'format_id': format_id,
                 'format_note': format_info.get('versionLibelle'),
                 'format_note': format_info.get('versionLibelle'),
                 'width': format_info.get('width'),
                 'width': format_info.get('width'),
-                'height': format_info.get('height'),
+                'height': height,
             }
             }
             if format_info['mediaType'] == u'rtmp':
             if format_info['mediaType'] == u'rtmp':
                 info['url'] = format_info['streamer']
                 info['url'] = format_info['streamer']

+ 81 - 0
youtube_dl/extractor/bambuser.py

@@ -0,0 +1,81 @@
+import re
+import json
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_request,
+)
+
+
+class BambuserIE(InfoExtractor):
+    IE_NAME = u'bambuser'
+    _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
+    _API_KEY = '005f64509e19a868399060af746a00aa'
+
+    _TEST = {
+        u'url': u'http://bambuser.com/v/4050584',
+        # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
+        #u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
+        u'info_dict': {
+            u'id': u'4050584',
+            u'ext': u'flv',
+            u'title': u'Education engineering days - lightning talks',
+            u'duration': 3741,
+            u'uploader': u'pixelversity',
+            u'uploader_id': u'344706',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
+            '&api_key=%s&vid=%s' % (self._API_KEY, video_id))
+        info_json = self._download_webpage(info_url, video_id)
+        info = json.loads(info_json)['result']
+
+        return {
+            'id': video_id,
+            'title': info['title'],
+            'url': info['url'],
+            'thumbnail': info.get('preview'),
+            'duration': int(info['length']),
+            'view_count': int(info['views_total']),
+            'uploader': info['username'],
+            'uploader_id': info['uid'],
+        }
+
+
+class BambuserChannelIE(InfoExtractor):
+    IE_NAME = u'bambuser:channel'
+    _VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
+    # The maximum number we can get with each request
+    _STEP = 50
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user = mobj.group('user')
+        urls = []
+        last_id = ''
+        for i in itertools.count(1):
+            req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
+                '&sort=created&access_mode=0%2C1%2C2&limit={count}'
+                '&method=broadcast&format=json&vid_older_than={last}'
+                ).format(user=user, count=self._STEP, last=last_id)
+            req = compat_urllib_request.Request(req_url)
+            # Without setting this header, we wouldn't get any result
+            req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
+            info_json = self._download_webpage(req, user,
+                u'Downloading page %d' % i)
+            results = json.loads(info_json)['result']
+            if len(results) == 0:
+                break
+            last_id = results[-1]['vid']
+            urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
+
+        return {
+            '_type': 'playlist',
+            'title': user,
+            'entries': urls,
+        }

+ 49 - 12
youtube_dl/extractor/brightcove.py

@@ -9,10 +9,13 @@ from ..utils import (
     compat_urllib_parse,
     compat_urllib_parse,
     find_xpath_attr,
     find_xpath_attr,
     compat_urlparse,
     compat_urlparse,
+    compat_str,
+    compat_urllib_request,
 
 
     ExtractorError,
     ExtractorError,
 )
 )
 
 
+
 class BrightcoveIE(InfoExtractor):
 class BrightcoveIE(InfoExtractor):
     _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
     _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@@ -23,7 +26,7 @@ class BrightcoveIE(InfoExtractor):
             # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
             # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
             u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
             u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
             u'file': u'2371591881001.mp4',
             u'file': u'2371591881001.mp4',
-            u'md5': u'9e80619e0a94663f0bdc849b4566af19',
+            u'md5': u'8eccab865181d29ec2958f32a6a754f5',
             u'note': u'Test Brightcove downloads and detection in GenericIE',
             u'note': u'Test Brightcove downloads and detection in GenericIE',
             u'info_dict': {
             u'info_dict': {
                 u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
                 u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
@@ -41,6 +44,17 @@ class BrightcoveIE(InfoExtractor):
                 u'uploader': u'Oracle',
                 u'uploader': u'Oracle',
             },
             },
         },
         },
+        {
+            # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
+            u'url': u'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
+            u'info_dict': {
+                u'id': u'2750934548001',
+                u'ext': u'mp4',
+                u'title': u'This Bracelet Acts as a Personal Thermostat',
+                u'description': u'md5:547b78c64f4112766ccf4e151c20b6a0',
+                u'uploader': u'Mashable',
+            },
+        },
     ]
     ]
 
 
     @classmethod
     @classmethod
@@ -68,24 +82,48 @@ class BrightcoveIE(InfoExtractor):
         videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
         videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
         if videoPlayer is not None:
         if videoPlayer is not None:
             params['@videoPlayer'] = videoPlayer.attrib['value']
             params['@videoPlayer'] = videoPlayer.attrib['value']
+        linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
+        if linkBase is not None:
+            params['linkBaseURL'] = linkBase.attrib['value']
         data = compat_urllib_parse.urlencode(params)
         data = compat_urllib_parse.urlencode(params)
         return cls._FEDERATED_URL_TEMPLATE % data
         return cls._FEDERATED_URL_TEMPLATE % data
 
 
+    @classmethod
+    def _extract_brightcove_url(cls, webpage):
+        """Try to extract the brightcove url from the wepbage, returns None
+        if it can't be found
+        """
+        m_brightcove = re.search(
+            r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>',
+            webpage, re.DOTALL)
+        if m_brightcove is not None:
+            return cls._build_brighcove_url(m_brightcove.group())
+        else:
+            return None
+
     def _real_extract(self, url):
     def _real_extract(self, url):
+        # Change the 'videoId' and others field to '@videoPlayer'
+        url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
+        # Change bckey (used by bcove.me urls) to playerKey
+        url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
         query_str = mobj.group('query')
         query_str = mobj.group('query')
         query = compat_urlparse.parse_qs(query_str)
         query = compat_urlparse.parse_qs(query_str)
 
 
         videoPlayer = query.get('@videoPlayer')
         videoPlayer = query.get('@videoPlayer')
         if videoPlayer:
         if videoPlayer:
-            return self._get_video_info(videoPlayer[0], query_str)
+            return self._get_video_info(videoPlayer[0], query_str, query)
         else:
         else:
             player_key = query['playerKey']
             player_key = query['playerKey']
             return self._get_playlist_info(player_key[0])
             return self._get_playlist_info(player_key[0])
 
 
-    def _get_video_info(self, video_id, query):
-        request_url = self._FEDERATED_URL_TEMPLATE % query
-        webpage = self._download_webpage(request_url, video_id)
+    def _get_video_info(self, video_id, query_str, query):
+        request_url = self._FEDERATED_URL_TEMPLATE % query_str
+        req = compat_urllib_request.Request(request_url)
+        linkBase = query.get('linkBaseURL')
+        if linkBase is not None:
+            req.add_header('Referer', linkBase[0])
+        webpage = self._download_webpage(req, video_id)
 
 
         self.report_extraction(video_id)
         self.report_extraction(video_id)
         info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
         info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
@@ -109,7 +147,7 @@ class BrightcoveIE(InfoExtractor):
 
 
     def _extract_video_info(self, video_info):
     def _extract_video_info(self, video_info):
         info = {
         info = {
-            'id': video_info['id'],
+            'id': compat_str(video_info['id']),
             'title': video_info['displayName'],
             'title': video_info['displayName'],
             'description': video_info.get('shortDescription'),
             'description': video_info.get('shortDescription'),
             'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
             'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
@@ -119,15 +157,14 @@ class BrightcoveIE(InfoExtractor):
         renditions = video_info.get('renditions')
         renditions = video_info.get('renditions')
         if renditions:
         if renditions:
             renditions = sorted(renditions, key=lambda r: r['size'])
             renditions = sorted(renditions, key=lambda r: r['size'])
-            best_format = renditions[-1]
-            info.update({
-                'url': best_format['defaultURL'],
-                'ext': 'mp4',
-            })
+            info['formats'] = [{
+                'url': rend['defaultURL'],
+                'height': rend.get('frameHeight'),
+                'width': rend.get('frameWidth'),
+            } for rend in renditions]
         elif video_info.get('FLVFullLengthURL') is not None:
         elif video_info.get('FLVFullLengthURL') is not None:
             info.update({
             info.update({
                 'url': video_info['FLVFullLengthURL'],
                 'url': video_info['FLVFullLengthURL'],
-                'ext': 'flv',
             })
             })
         else:
         else:
             raise ExtractorError(u'Unable to extract video url for %s' % info['id'])
             raise ExtractorError(u'Unable to extract video url for %s' % info['id'])

+ 4 - 2
youtube_dl/extractor/canalc2.py

@@ -6,7 +6,7 @@ from .common import InfoExtractor
 
 
 class Canalc2IE(InfoExtractor):
 class Canalc2IE(InfoExtractor):
     IE_NAME = 'canalc2.tv'
     IE_NAME = 'canalc2.tv'
-    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
+    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
 
 
     _TEST = {
     _TEST = {
         u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
         u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
@@ -18,7 +18,9 @@ class Canalc2IE(InfoExtractor):
     }
     }
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
-        video_id = re.match(self._VALID_URL, url).group(1)
+        video_id = re.match(self._VALID_URL, url).group('id')
+        # We need to set the voir field for getting the file name
+        url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id
         webpage = self._download_webpage(url, video_id)
         webpage = self._download_webpage(url, video_id)
         file_name = self._search_regex(
         file_name = self._search_regex(
             r"so\.addVariable\('file','(.*?)'\);",
             r"so\.addVariable\('file','(.*?)'\);",

+ 3 - 1
youtube_dl/extractor/cinemassacre.py

@@ -41,7 +41,7 @@ class CinemassacreIE(InfoExtractor):
         webpage_url = u'http://' + mobj.group('url')
         webpage_url = u'http://' + mobj.group('url')
         webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
         webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
         video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
         video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
-        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/(?:embed|player)\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
+        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
         if not mobj:
         if not mobj:
             raise ExtractorError(u'Can\'t extract embed url and video id')
             raise ExtractorError(u'Can\'t extract embed url and video id')
         playerdata_url = mobj.group(u'embed_url')
         playerdata_url = mobj.group(u'embed_url')
@@ -65,6 +65,7 @@ class CinemassacreIE(InfoExtractor):
             {
             {
                 'url': url,
                 'url': url,
                 'play_path': 'mp4:' + sd_file,
                 'play_path': 'mp4:' + sd_file,
+                'rtmp_live': True, # workaround
                 'ext': 'flv',
                 'ext': 'flv',
                 'format': 'sd',
                 'format': 'sd',
                 'format_id': 'sd',
                 'format_id': 'sd',
@@ -72,6 +73,7 @@ class CinemassacreIE(InfoExtractor):
             {
             {
                 'url': url,
                 'url': url,
                 'play_path': 'mp4:' + hd_file,
                 'play_path': 'mp4:' + hd_file,
+                'rtmp_live': True, # workaround
                 'ext': 'flv',
                 'ext': 'flv',
                 'format': 'hd',
                 'format': 'hd',
                 'format_id': 'hd',
                 'format_id': 'hd',

+ 1 - 1
youtube_dl/extractor/cnn.py

@@ -6,7 +6,7 @@ from ..utils import determine_ext
 
 
 
 
 class CNNIE(InfoExtractor):
 class CNNIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
+    _VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
         (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
         (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
 
 
     _TESTS = [{
     _TESTS = [{

+ 21 - 6
youtube_dl/extractor/common.py

@@ -63,7 +63,7 @@ class InfoExtractor(object):
                     * ext       Will be calculated from url if missing
                     * ext       Will be calculated from url if missing
                     * format    A human-readable description of the format
                     * format    A human-readable description of the format
                                 ("mp4 container with h264/opus").
                                 ("mp4 container with h264/opus").
-                                Calculated from the format_id, width, height 
+                                Calculated from the format_id, width, height.
                                 and format_note fields if missing.
                                 and format_note fields if missing.
                     * format_id A short description of the format
                     * format_id A short description of the format
                                 ("mp4_h264_opus" or "19")
                                 ("mp4_h264_opus" or "19")
@@ -71,6 +71,13 @@ class InfoExtractor(object):
                                 ("3D" or "DASH video")
                                 ("3D" or "DASH video")
                     * width     Width of the video, if known
                     * width     Width of the video, if known
                     * height    Height of the video, if known
                     * height    Height of the video, if known
+                    * abr       Average audio bitrate in KBit/s
+                    * acodec    Name of the audio codec in use
+                    * vbr       Average video bitrate in KBit/s
+                    * vcodec    Name of the video codec in use
+    webpage_url:    The url to the video webpage, if given to youtube-dl it
+                    should allow to get the same result again. (It will be set
+                    by YoutubeDL if it's missing)
 
 
     Unless mentioned otherwise, the fields should be Unicode strings.
     Unless mentioned otherwise, the fields should be Unicode strings.
 
 
@@ -312,13 +319,21 @@ class InfoExtractor(object):
 
 
     # Helper functions for extracting OpenGraph info
     # Helper functions for extracting OpenGraph info
     @staticmethod
     @staticmethod
-    def _og_regex(prop):
-        return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop)
+    def _og_regexes(prop):
+        content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
+        property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
+        template = r'<meta[^>]+?%s[^>]+?%s'
+        return [
+            template % (property_re, content_re),
+            template % (content_re, property_re),
+        ]
 
 
     def _og_search_property(self, prop, html, name=None, **kargs):
     def _og_search_property(self, prop, html, name=None, **kargs):
         if name is None:
         if name is None:
             name = 'OpenGraph %s' % prop
             name = 'OpenGraph %s' % prop
-        escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
+        escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
+        if escaped is None:
+            return None
         return unescapeHTML(escaped)
         return unescapeHTML(escaped)
 
 
     def _og_search_thumbnail(self, html, **kargs):
     def _og_search_thumbnail(self, html, **kargs):
@@ -331,8 +346,8 @@ class InfoExtractor(object):
         return self._og_search_property('title', html, **kargs)
         return self._og_search_property('title', html, **kargs)
 
 
     def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
     def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
-        regexes = [self._og_regex('video')]
-        if secure: regexes.insert(0, self._og_regex('video:secure_url'))
+        regexes = self._og_regexes('video')
+        if secure: regexes = self._og_regexes('video:secure_url') + regexes
         return self._html_search_regex(regexes, html, name, **kargs)
         return self._html_search_regex(regexes, html, name, **kargs)
 
 
     def _rta_search(self, html):
     def _rta_search(self, html):

+ 23 - 8
youtube_dl/extractor/dailymotion.py

@@ -21,6 +21,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
         """Build a request with the family filter disabled"""
         """Build a request with the family filter disabled"""
         request = compat_urllib_request.Request(url)
         request = compat_urllib_request.Request(url)
         request.add_header('Cookie', 'family_filter=off')
         request.add_header('Cookie', 'family_filter=off')
+        request.add_header('Cookie', 'ff=off')
         return request
         return request
 
 
 class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
 class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
@@ -61,6 +62,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
             },
             },
             u'skip': u'VEVO is only available in some countries',
             u'skip': u'VEVO is only available in some countries',
         },
         },
+        # age-restricted video
+        {
+            u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
+            u'file': u'xyh2zz.mp4',
+            u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
+            u'info_dict': {
+                u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
+                u'uploader': 'HotWaves1012',
+                u'age_limit': 18,
+            }
+
+        }
     ]
     ]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
@@ -90,7 +103,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
         video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
         video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
                                              # Looking for official user
                                              # Looking for official user
                                              r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
                                              r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
-                                            webpage, 'video uploader')
+                                            webpage, 'video uploader', fatal=False)
+        age_limit = self._rta_search(webpage)
 
 
         video_upload_date = None
         video_upload_date = None
         mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
         mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
@@ -127,22 +141,23 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
             raise ExtractorError(u'Unable to extract video URL')
             raise ExtractorError(u'Unable to extract video URL')
 
 
         # subtitles
         # subtitles
-        video_subtitles = self.extract_subtitles(video_id)
+        video_subtitles = self.extract_subtitles(video_id, webpage)
         if self._downloader.params.get('listsubtitles', False):
         if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id)
+            self._list_available_subtitles(video_id, webpage)
             return
             return
 
 
-        return [{
+        return {
             'id':       video_id,
             'id':       video_id,
             'formats': formats,
             'formats': formats,
             'uploader': video_uploader,
             'uploader': video_uploader,
             'upload_date':  video_upload_date,
             'upload_date':  video_upload_date,
             'title':    self._og_search_title(webpage),
             'title':    self._og_search_title(webpage),
             'subtitles':    video_subtitles,
             'subtitles':    video_subtitles,
-            'thumbnail': info['thumbnail_url']
-        }]
+            'thumbnail': info['thumbnail_url'],
+            'age_limit': age_limit,
+        }
 
 
-    def _get_available_subtitles(self, video_id):
+    def _get_available_subtitles(self, video_id, webpage):
         try:
         try:
             sub_list = self._download_webpage(
             sub_list = self._download_webpage(
                 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
                 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
@@ -171,7 +186,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
             webpage = self._download_webpage(request,
             webpage = self._download_webpage(request,
                                              id, u'Downloading page %s' % pagenum)
                                              id, u'Downloading page %s' % pagenum)
 
 
-            playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
+            playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage)
             video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
             video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
 
 
             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:

+ 1 - 1
youtube_dl/extractor/depositfiles.py

@@ -25,7 +25,7 @@ class DepositFilesIE(InfoExtractor):
         url = 'http://depositfiles.com/en/files/' + file_id
         url = 'http://depositfiles.com/en/files/' + file_id
 
 
         # Retrieve file webpage with 'Free download' button pressed
         # Retrieve file webpage with 'Free download' button pressed
-        free_download_indication = { 'gateway_result' : '1' }
+        free_download_indication = {'gateway_result' : '1'}
         request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
         request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
         try:
         try:
             self.report_download_webpage(file_id)
             self.report_download_webpage(file_id)

+ 37 - 0
youtube_dl/extractor/eitb.py

@@ -0,0 +1,37 @@
+# encoding: utf-8
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+from ..utils import ExtractorError
+
+
+class EitbIE(InfoExtractor):
+    IE_NAME = u'eitb.tv'
+    _VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
+
+    _TEST = {
+        u'add_ie': ['Brightcove'],
+        u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
+        u'md5': u'edf4436247185adee3ea18ce64c47998',
+        u'info_dict': {
+            u'id': u'2743577154001',
+            u'ext': u'mp4',
+            u'title': u'60 minutos (Lasa y Zabala, 30 años)',
+            # All videos from eitb has this description in the brightcove info
+            u'description': u'.',
+            u'uploader': u'Euskal Telebista',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        chapter_id = mobj.group('chapter_id')
+        webpage = self._download_webpage(url, chapter_id)
+        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if bc_url is None:
+            raise ExtractorError(u'Could not extract the Brightcove url')
+        # The BrightcoveExperience object doesn't contain the video id, we set
+        # it manually
+        bc_url += '&%40videoPlayer={0}'.format(chapter_id)
+        return self.url_result(bc_url, BrightcoveIE.ie_key())

+ 2 - 0
youtube_dl/extractor/exfm.py

@@ -21,6 +21,7 @@ class ExfmIE(InfoExtractor):
                 u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
                 u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
             },
             },
             u'note': u'Soundcloud song',
             u'note': u'Soundcloud song',
+            u'skip': u'The site is down too often',
         },
         },
         {
         {
             u'url': u'http://ex.fm/song/wddt8',
             u'url': u'http://ex.fm/song/wddt8',
@@ -30,6 +31,7 @@ class ExfmIE(InfoExtractor):
                 u'title': u'Safe and Sound',
                 u'title': u'Safe and Sound',
                 u'uploader': u'Capital Cities',
                 u'uploader': u'Capital Cities',
             },
             },
+            u'skip': u'The site is down too often',
         },
         },
     ]
     ]
 
 

+ 50 - 0
youtube_dl/extractor/extremetube.py

@@ -0,0 +1,50 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+)
+
+class ExtremeTubeIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
+    _TEST = {
+        u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
+        u'file': u'652431.mp4',
+        u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0',
+        u'info_dict': {
+            u"title": u"Music Video 14 british euro brit european cumshots swallow",
+            u"uploader": u"unknown",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title')
+        uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False)
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url'))
+        path = compat_urllib_parse_urlparse(video_url).path
+        extension = os.path.splitext(path)[1][1:]
+        format = path.split('/')[5].split('_')[:2]
+        format = "-".join(format)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'uploader': uploader,
+            'url': video_url,
+            'ext': extension,
+            'format': format,
+            'format_id': format,
+            'age_limit': 18,
+        }

+ 38 - 0
youtube_dl/extractor/gamekings.py

@@ -0,0 +1,38 @@
+import re
+
+from .common import InfoExtractor
+
+
+class GamekingsIE(InfoExtractor):
+    _VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
+    _TEST = {
+        u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
+        u'file': u'20130811.mp4',
+        # MD5 is flaky, seems to change regularly
+        #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
+        u'info_dict': {
+            u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
+            u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
+        }
+    }
+
+    def _real_extract(self, url):
+
+        mobj = re.match(self._VALID_URL, url)
+        name = mobj.group('name')
+        webpage = self._download_webpage(url, name)
+        video_url = self._og_search_video_url(webpage)
+
+        video = re.search(r'[0-9]+', video_url)
+        video_id = video.group(0)
+
+        # Todo: add medium format
+        video_url = video_url.replace(video_id, 'large/' + video_id)
+
+        return {
+            'id': video_id,
+            'ext': 'mp4',
+            'url': video_url,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+        }

+ 23 - 5
youtube_dl/extractor/generic.py

@@ -33,6 +33,7 @@ class GenericIE(InfoExtractor):
         },
         },
         # embedded vimeo video
         # embedded vimeo video
         {
         {
+            u'add_ie': ['Vimeo'],
             u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
             u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
             u'file': u'22444065.mp4',
             u'file': u'22444065.mp4',
             u'md5': u'2903896e23df39722c33f015af0666e2',
             u'md5': u'2903896e23df39722c33f015af0666e2',
@@ -44,6 +45,7 @@ class GenericIE(InfoExtractor):
         },
         },
         # bandcamp page with custom domain
         # bandcamp page with custom domain
         {
         {
+            u'add_ie': ['Bandcamp'],
             u'url': u'http://bronyrock.com/track/the-pony-mash',
             u'url': u'http://bronyrock.com/track/the-pony-mash',
             u'file': u'3235767654.mp3',
             u'file': u'3235767654.mp3',
             u'info_dict': {
             u'info_dict': {
@@ -52,6 +54,23 @@ class GenericIE(InfoExtractor):
             },
             },
             u'skip': u'There is a limit of 200 free downloads / month for the test song',
             u'skip': u'There is a limit of 200 free downloads / month for the test song',
         },
         },
+        # embedded brightcove video
+        # it also tests brightcove videos that need to set the 'Referer' in the
+        # http requests
+        {
+            u'add_ie': ['Brightcove'],
+            u'url': u'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
+            u'info_dict': {
+                u'id': u'2765128793001',
+                u'ext': u'mp4',
+                u'title': u'Le cours de bourse : l’analyse technique',
+                u'description': u'md5:7e9ad046e968cb2d1114004aba466fd9',
+                u'uploader': u'BFM BUSINESS',
+            },
+            u'params': {
+                u'skip_download': True,
+            },
+        },
     ]
     ]
 
 
     def report_download_webpage(self, video_id):
     def report_download_webpage(self, video_id):
@@ -144,10 +163,9 @@ class GenericIE(InfoExtractor):
 
 
         self.report_extraction(video_id)
         self.report_extraction(video_id)
         # Look for BrightCove:
         # Look for BrightCove:
-        m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
-        if m_brightcove is not None:
+        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if bc_url is not None:
             self.to_screen(u'Brightcove video detected.')
             self.to_screen(u'Brightcove video detected.')
-            bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
             return self.url_result(bc_url, 'Brightcove')
             return self.url_result(bc_url, 'Brightcove')
 
 
         # Look for embedded Vimeo player
         # Look for embedded Vimeo player
@@ -160,9 +178,9 @@ class GenericIE(InfoExtractor):
 
 
         # Look for embedded YouTube player
         # Look for embedded YouTube player
         mobj = re.search(
         mobj = re.search(
-            r'<iframe[^>]+?src="(https?://(?:www\.)?youtube.com/embed/.+?)"', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?youtube.com/embed/.+?)\1', webpage)
         if mobj:
         if mobj:
-            surl = unescapeHTML(mobj.group(1))
+            surl = unescapeHTML(mobj.group(u'url'))
             return self.url_result(surl, 'Youtube')
             return self.url_result(surl, 'Youtube')
 
 
         # Look for Bandcamp pages with custom domain
         # Look for Bandcamp pages with custom domain

+ 2 - 2
youtube_dl/extractor/hypem.py

@@ -30,7 +30,7 @@ class HypemIE(InfoExtractor):
             raise ExtractorError(u'Invalid URL: %s' % url)
             raise ExtractorError(u'Invalid URL: %s' % url)
         track_id = mobj.group(1)
         track_id = mobj.group(1)
 
 
-        data = { 'ax': 1, 'ts': time.time() }
+        data = {'ax': 1, 'ts': time.time()}
         data_encoded = compat_urllib_parse.urlencode(data)
         data_encoded = compat_urllib_parse.urlencode(data)
         complete_url = url + "?" + data_encoded
         complete_url = url + "?" + data_encoded
         request = compat_urllib_request.Request(complete_url)
         request = compat_urllib_request.Request(complete_url)
@@ -68,4 +68,4 @@ class HypemIE(InfoExtractor):
             'ext':      "mp3",
             'ext':      "mp3",
             'title':    title,
             'title':    title,
             'artist':   artist,
             'artist':   artist,
-        }]
+        }]

+ 6 - 1
youtube_dl/extractor/kankan.py

@@ -1,8 +1,10 @@
 import re
 import re
+import hashlib
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import determine_ext
 from ..utils import determine_ext
 
 
+_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
 
 
 class KankanIE(InfoExtractor):
 class KankanIE(InfoExtractor):
     _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
     _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
@@ -30,7 +32,10 @@ class KankanIE(InfoExtractor):
                                                  video_id, u'Downloading video url info')
                                                  video_id, u'Downloading video url info')
         ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
         ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
         path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
         path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
-        video_url = 'http://%s%s' % (ip, path)
+        param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1')
+        param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2')
+        key = _md5('xl_mp43651' + param1 + param2)
+        video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
 
 
         return {'id': video_id,
         return {'id': video_id,
                 'title': title,
                 'title': title,

+ 4 - 4
youtube_dl/extractor/keezmovies.py

@@ -12,7 +12,7 @@ from ..aes import (
 )
 )
 
 
 class KeezMoviesIE(InfoExtractor):
 class KeezMoviesIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
     _TEST = {
     _TEST = {
         u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
         u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
         u'file': u'1214711.mp4',
         u'file': u'1214711.mp4',
@@ -43,10 +43,10 @@ class KeezMoviesIE(InfoExtractor):
         if webpage.find('encrypted=true')!=-1:
         if webpage.find('encrypted=true')!=-1:
             password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, u'password')
             password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, u'password')
             video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
             video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
-        path = compat_urllib_parse_urlparse( video_url ).path
-        extension = os.path.splitext( path )[1][1:]
+        path = compat_urllib_parse_urlparse(video_url).path
+        extension = os.path.splitext(path)[1][1:]
         format = path.split('/')[4].split('_')[:2]
         format = path.split('/')[4].split('_')[:2]
-        format = "-".join( format )
+        format = "-".join(format)
 
 
         age_limit = self._rta_search(webpage)
         age_limit = self._rta_search(webpage)
 
 

+ 47 - 7
youtube_dl/extractor/livestream.py

@@ -1,16 +1,19 @@
 import re
 import re
 import json
 import json
+import xml.etree.ElementTree
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
     compat_urllib_parse_urlparse,
     compat_urllib_parse_urlparse,
     compat_urlparse,
     compat_urlparse,
     get_meta_content,
     get_meta_content,
+    xpath_with_ns,
     ExtractorError,
     ExtractorError,
 )
 )
 
 
 
 
 class LivestreamIE(InfoExtractor):
 class LivestreamIE(InfoExtractor):
+    IE_NAME = u'livestream'
     _VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
     _VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
     _TEST = {
     _TEST = {
         u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
         u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
@@ -40,13 +43,9 @@ class LivestreamIE(InfoExtractor):
 
 
         if video_id is None:
         if video_id is None:
             # This is an event page:
             # This is an event page:
-            player = get_meta_content('twitter:player', webpage)
-            if player is None:
-                raise ExtractorError('Couldn\'t extract event api url')
-            api_url = player.replace('/player', '')
-            api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url)
-            info = json.loads(self._download_webpage(api_url, event_name,
-                                                     u'Downloading event info'))
+            config_json = self._search_regex(r'window.config = ({.*?});',
+                webpage, u'window config')
+            info = json.loads(config_json)['event']
             videos = [self._extract_video_info(video_data['data'])
             videos = [self._extract_video_info(video_data['data'])
                 for video_data in info['feed']['data'] if video_data['type'] == u'video']
                 for video_data in info['feed']['data'] if video_data['type'] == u'video']
             return self.playlist_result(videos, info['id'], info['full_name'])
             return self.playlist_result(videos, info['id'], info['full_name'])
@@ -58,3 +57,44 @@ class LivestreamIE(InfoExtractor):
             info = json.loads(self._download_webpage(api_url, video_id,
             info = json.loads(self._download_webpage(api_url, video_id,
                                                      u'Downloading video info'))
                                                      u'Downloading video info'))
             return self._extract_video_info(info)
             return self._extract_video_info(info)
+
+
+# The original version of Livestream uses a different system
+class LivestreamOriginalIE(InfoExtractor):
+    IE_NAME = u'livestream:original'
+    _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
+    _TEST = {
+        u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
+        u'info_dict': {
+            u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
+            u'ext': u'flv',
+            u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
+        },
+        u'params': {
+            # rtmp
+            u'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        user = mobj.group('user')
+        api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
+
+        api_response = self._download_webpage(api_url, video_id)
+        info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
+        item = info.find('channel').find('item')
+        ns = {'media': 'http://search.yahoo.com/mrss'}
+        thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
+        # Remove the extension and number from the path (like 1.jpg)
+        path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path')
+
+        return {
+            'id': video_id,
+            'title': item.find('title').text,
+            'url': 'rtmp://extondemand.livestream.com/ondemand',
+            'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path),
+            'ext': 'flv',
+            'thumbnail': thumbnail_url,
+        }

+ 46 - 5
youtube_dl/extractor/metacafe.py

@@ -20,7 +20,9 @@ class MetacafeIE(InfoExtractor):
     _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
     _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
     _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
     _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
     IE_NAME = u'metacafe'
     IE_NAME = u'metacafe'
-    _TESTS = [{
+    _TESTS = [
+    # Youtube video
+    {
         u"add_ie": ["Youtube"],
         u"add_ie": ["Youtube"],
         u"url":  u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
         u"url":  u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
         u"file":  u"_aUehQsCQtM.mp4",
         u"file":  u"_aUehQsCQtM.mp4",
@@ -32,15 +34,42 @@ class MetacafeIE(InfoExtractor):
             u"uploader_id": u"PBS"
             u"uploader_id": u"PBS"
         }
         }
     },
     },
+    # Normal metacafe video
+    {
+        u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
+        u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
+        u'info_dict': {
+            u'id': u'11121940',
+            u'ext': u'mp4',
+            u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
+            u'uploader': u'ign',
+            u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
+        },
+    },
+    # AnyClip video
     {
     {
         u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
         u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
         u"file": u"an-dVVXnuY7Jh77J.mp4",
         u"file": u"an-dVVXnuY7Jh77J.mp4",
         u"info_dict": {
         u"info_dict": {
             u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
             u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
             u"uploader": u"anyclip",
             u"uploader": u"anyclip",
-            u"description": u"md5:38c711dd98f5bb87acf973d573442e67"
-        }
-    }]
+            u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
+        },
+    },
+    # age-restricted video
+    {
+        u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
+        u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
+        u'info_dict': {
+            u'id': u'5186653',
+            u'ext': u'mp4',
+            u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
+            u'uploader': u'Dwayne Pipe',
+            u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
+            u'age_limit': 18,
+        },
+    },
+    ]
 
 
 
 
     def report_disclaimer(self):
     def report_disclaimer(self):
@@ -62,6 +91,7 @@ class MetacafeIE(InfoExtractor):
             'submit': "Continue - I'm over 18",
             'submit': "Continue - I'm over 18",
             }
             }
         request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
         request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         try:
         try:
             self.report_age_confirmation()
             self.report_age_confirmation()
             compat_urllib_request.urlopen(request).read()
             compat_urllib_request.urlopen(request).read()
@@ -83,7 +113,12 @@ class MetacafeIE(InfoExtractor):
 
 
         # Retrieve video webpage to extract further information
         # Retrieve video webpage to extract further information
         req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
         req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
-        req.headers['Cookie'] = 'flashVersion=0;'
+
+        # AnyClip videos require the flashversion cookie so that we get the link
+        # to the mp4 file
+        mobj_an = re.match(r'^an-(.*?)$', video_id)
+        if mobj_an:
+            req.headers['Cookie'] = 'flashVersion=0;'
         webpage = self._download_webpage(req, video_id)
         webpage = self._download_webpage(req, video_id)
 
 
         # Extract URL, uploader and title from webpage
         # Extract URL, uploader and title from webpage
@@ -125,6 +160,11 @@ class MetacafeIE(InfoExtractor):
                 r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
                 r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
                 webpage, u'uploader nickname', fatal=False)
                 webpage, u'uploader nickname', fatal=False)
 
 
+        if re.search(r'"contentRating":"restricted"', webpage) is not None:
+            age_limit = 18
+        else:
+            age_limit = 0
+
         return {
         return {
             '_type':    'video',
             '_type':    'video',
             'id':       video_id,
             'id':       video_id,
@@ -134,4 +174,5 @@ class MetacafeIE(InfoExtractor):
             'upload_date':  None,
             'upload_date':  None,
             'title':    video_title,
             'title':    video_title,
             'ext':      video_ext,
             'ext':      video_ext,
+            'age_limit': age_limit,
         }
         }

+ 49 - 0
youtube_dl/extractor/mofosex.py

@@ -0,0 +1,49 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+)
+
+class MofosexIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
+    _TEST = {
+        u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
+        u'file': u'5018.mp4',
+        u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
+        u'info_dict': {
+            u"title": u"Japanese Teen Music Video",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
+        path = compat_urllib_parse_urlparse(video_url).path
+        extension = os.path.splitext(path)[1][1:]
+        format = path.split('/')[5].split('_')[:2]
+        format = "-".join(format)
+
+        age_limit = self._rta_search(webpage)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'url': video_url,
+            'ext': extension,
+            'format': format,
+            'format_id': format,
+            'age_limit': age_limit,
+        }

+ 4 - 1
youtube_dl/extractor/mtv.py

@@ -26,6 +26,7 @@ class MTVIE(InfoExtractor):
             },
             },
         },
         },
         {
         {
+            u'add_ie': ['Vevo'],
             u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
             u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
             u'file': u'USCJY1331283.mp4',
             u'file': u'USCJY1331283.mp4',
             u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
             u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
@@ -47,7 +48,7 @@ class MTVIE(InfoExtractor):
     def _transform_rtmp_url(rtmp_video_url):
     def _transform_rtmp_url(rtmp_video_url):
         m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
         m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
         if not m:
         if not m:
-            raise ExtractorError(u'Cannot transform RTMP url')
+            return rtmp_video_url
         base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
         base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
         return base + m.group('finalid')
         return base + m.group('finalid')
 
 
@@ -80,6 +81,8 @@ class MTVIE(InfoExtractor):
         video_id = self._id_from_uri(uri)
         video_id = self._id_from_uri(uri)
         self.report_extraction(video_id)
         self.report_extraction(video_id)
         mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
         mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
+        # Remove the templates, like &device={device}
+        mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url)
         if 'acceptMethods' not in mediagen_url:
         if 'acceptMethods' not in mediagen_url:
             mediagen_url += '&acceptMethods=fms'
             mediagen_url += '&acceptMethods=fms'
         mediagen_page = self._download_webpage(mediagen_url, video_id,
         mediagen_page = self._download_webpage(mediagen_url, video_id,

+ 48 - 0
youtube_dl/extractor/myspace.py

@@ -0,0 +1,48 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_str,
+)
+
+
+class MySpaceIE(InfoExtractor):
+    _VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689',
+        u'info_dict': {
+            u'id': u'100008689',
+            u'ext': u'flv',
+            u'title': u'Viva La Vida',
+            u'description': u'The official Viva La Vida video, directed by Hype Williams',
+            u'uploader': u'Coldplay',
+            u'uploader_id': u'coldplay',
+        },
+        u'params': {
+            # rtmp download
+            u'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
+            u'context'))
+        video = context['video']
+        rtmp_url, play_path = video['streamUrl'].split(';', 1)
+
+        return {
+            'id': compat_str(video['mediaId']),
+            'title': video['title'],
+            'url': rtmp_url,
+            'play_path': play_path,
+            'ext': 'flv',
+            'description': video['description'],
+            'thumbnail': video['imageUrl'],
+            'uploader': video['artistName'],
+            'uploader_id': video['artistUsername'],
+        }

+ 3 - 3
youtube_dl/extractor/pornhub.py

@@ -47,10 +47,10 @@ class PornHubIE(InfoExtractor):
 
 
         formats = []
         formats = []
         for video_url in video_urls:
         for video_url in video_urls:
-            path = compat_urllib_parse_urlparse( video_url ).path
-            extension = os.path.splitext( path )[1][1:]
+            path = compat_urllib_parse_urlparse(video_url).path
+            extension = os.path.splitext(path)[1][1:]
             format = path.split('/')[5].split('_')[:2]
             format = path.split('/')[5].split('_')[:2]
-            format = "-".join( format )
+            format = "-".join(format)
             formats.append({
             formats.append({
                 'url': video_url,
                 'url': video_url,
                 'ext': extension,
                 'ext': extension,

+ 3 - 1
youtube_dl/extractor/redtube.py

@@ -8,7 +8,9 @@ class RedTubeIE(InfoExtractor):
     _TEST = {
     _TEST = {
         u'url': u'http://www.redtube.com/66418',
         u'url': u'http://www.redtube.com/66418',
         u'file': u'66418.mp4',
         u'file': u'66418.mp4',
-        u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
+        # md5 varies from time to time, as in
+        # https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
+        #u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
         u'info_dict': {
         u'info_dict': {
             u"title": u"Sucked on a toilet",
             u"title": u"Sucked on a toilet",
             u"age_limit": 18,
             u"age_limit": 18,

+ 0 - 12
youtube_dl/extractor/rtlnow.py

@@ -62,18 +62,6 @@ class RTLnowIE(InfoExtractor):
             u'skip_download': True,
             u'skip_download': True,
         },
         },
     },
     },
-    {
-        u'url': u'http://www.rtlnitronow.de/recht-ordnung/stadtpolizei-frankfurt-gerichtsvollzieher-leipzig.php?film_id=129679&player=1&season=1',
-        u'file': u'129679.flv',
-        u'info_dict': {
-            u'upload_date': u'20131016', 
-            u'title': u'Recht & Ordnung - Stadtpolizei Frankfurt/ Gerichtsvollzieher...',
-            u'description': u'Stadtpolizei Frankfurt/ Gerichtsvollzieher Leipzig',
-        },
-        u'params': {
-            u'skip_download': True,
-        },
-    },
     {
     {
         u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
         u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
         u'file': u'124903.flv',
         u'file': u'124903.flv',

+ 1 - 0
youtube_dl/extractor/slashdot.py

@@ -7,6 +7,7 @@ class SlashdotIE(InfoExtractor):
     _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
     _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
 
 
     _TEST = {
     _TEST = {
+        u'add_ie': ['Ooyala'],
         u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
         u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
         u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
         u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
         u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
         u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',

+ 51 - 81
youtube_dl/extractor/soundcloud.py

@@ -29,17 +29,34 @@ class SoundcloudIE(InfoExtractor):
                     )
                     )
                     '''
                     '''
     IE_NAME = u'soundcloud'
     IE_NAME = u'soundcloud'
-    _TEST = {
-        u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
-        u'file': u'62986583.mp3',
-        u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
-        u'info_dict': {
-            u"upload_date": u"20121011", 
-            u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", 
-            u"uploader": u"E.T. ExTerrestrial Music", 
-            u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
-        }
-    }
+    _TESTS = [
+        {
+            u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
+            u'file': u'62986583.mp3',
+            u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
+            u'info_dict': {
+                u"upload_date": u"20121011", 
+                u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", 
+                u"uploader": u"E.T. ExTerrestrial Music", 
+                u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
+            }
+        },
+        # not streamable song
+        {
+            u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
+            u'info_dict': {
+                u'id': u'47127627',
+                u'ext': u'mp3',
+                u'title': u'Goldrushed',
+                u'uploader': u'The Royal Concept',
+                u'upload_date': u'20120521',
+            },
+            u'params': {
+                # rtmp
+                u'skip_download': True,
+            },
+        },
+    ]
 
 
     _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
     _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
 
 
@@ -56,24 +73,39 @@ class SoundcloudIE(InfoExtractor):
         return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
         return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
 
 
     def _extract_info_dict(self, info, full_title=None, quiet=False):
     def _extract_info_dict(self, info, full_title=None, quiet=False):
-        video_id = info['id']
-        name = full_title or video_id
+        track_id = compat_str(info['id'])
+        name = full_title or track_id
         if quiet == False:
         if quiet == False:
             self.report_extraction(name)
             self.report_extraction(name)
 
 
         thumbnail = info['artwork_url']
         thumbnail = info['artwork_url']
         if thumbnail is not None:
         if thumbnail is not None:
             thumbnail = thumbnail.replace('-large', '-t500x500')
             thumbnail = thumbnail.replace('-large', '-t500x500')
-        return {
-            'id':       info['id'],
+        result = {
+            'id':       track_id,
             'url':      info['stream_url'] + '?client_id=' + self._CLIENT_ID,
             'url':      info['stream_url'] + '?client_id=' + self._CLIENT_ID,
             'uploader': info['user']['username'],
             'uploader': info['user']['username'],
             'upload_date': unified_strdate(info['created_at']),
             'upload_date': unified_strdate(info['created_at']),
             'title':    info['title'],
             'title':    info['title'],
-            'ext':      u'mp3',
+            'ext':      info.get('original_format', u'mp3'),
             'description': info['description'],
             'description': info['description'],
             'thumbnail': thumbnail,
             'thumbnail': thumbnail,
         }
         }
+        if info.get('downloadable', False):
+            result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
+        if not info.get('streamable', False):
+            # We have to get the rtmp url
+            stream_json = self._download_webpage(
+                'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._CLIENT_ID),
+                track_id, u'Downloading track url')
+            rtmp_url = json.loads(stream_json)['rtmp_mp3_128_url']
+            # The url doesn't have an rtmp app, we have to extract the playpath
+            url, path = rtmp_url.split('mp3:', 1)
+            result.update({
+                'url': url,
+                'play_path': 'mp3:' + path,
+            })
+        return result
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
         mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
@@ -106,70 +138,8 @@ class SoundcloudIE(InfoExtractor):
 class SoundcloudSetIE(SoundcloudIE):
 class SoundcloudSetIE(SoundcloudIE):
     _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
     _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
     IE_NAME = u'soundcloud:set'
     IE_NAME = u'soundcloud:set'
-    _TEST = {
-        u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep",
-        u"playlist": [
-            {
-                u"file":"30510138.mp3",
-                u"md5":"f9136bf103901728f29e419d2c70f55d",
-                u"info_dict": {
-                    u"upload_date": u"20111213",
-                    u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
-                    u"uploader": u"The Royal Concept",
-                    u"title": u"D-D-Dance"
-                }
-            },
-            {
-                u"file":"47127625.mp3",
-                u"md5":"09b6758a018470570f8fd423c9453dd8",
-                u"info_dict": {
-                    u"upload_date": u"20120521",
-                    u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
-                    u"uploader": u"The Royal Concept",
-                    u"title": u"The Royal Concept - Gimme Twice"
-                }
-            },
-            {
-                u"file":"47127627.mp3",
-                u"md5":"154abd4e418cea19c3b901f1e1306d9c",
-                u"info_dict": {
-                    u"upload_date": u"20120521",
-                    u"uploader": u"The Royal Concept",
-                    u"title": u"Goldrushed"
-                }
-            },
-            {
-                u"file":"47127629.mp3",
-                u"md5":"2f5471edc79ad3f33a683153e96a79c1",
-                u"info_dict": {
-                    u"upload_date": u"20120521",
-                    u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
-                    u"uploader": u"The Royal Concept",
-                    u"title": u"In the End"
-                }
-            },
-            {
-                u"file":"47127631.mp3",
-                u"md5":"f9ba87aa940af7213f98949254f1c6e2",
-                u"info_dict": {
-                    u"upload_date": u"20120521",
-                    u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com",
-                    u"uploader": u"The Royal Concept",
-                    u"title": u"Knocked Up"
-                }
-            },
-            {
-                u"file":"75206121.mp3",
-                u"md5":"f9d1fe9406717e302980c30de4af9353",
-                u"info_dict": {
-                    u"upload_date": u"20130116",
-                    u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central).  \r\nAs a gift to our fans we would like to offer you a free download of the track!  ",
-                    u"uploader": u"The Royal Concept",
-                    u"title": u"World On Fire"
-                }
-            }
-        ]
-    }
+    # it's in tests/test_playlists.py
+    _TESTS = []
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
@@ -208,7 +178,7 @@ class SoundcloudUserIE(SoundcloudIE):
     IE_NAME = u'soundcloud:user'
     IE_NAME = u'soundcloud:user'
 
 
     # it's in tests/test_playlists.py
     # it's in tests/test_playlists.py
-    _TEST = None
+    _TESTS = []
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)

+ 19 - 6
youtube_dl/extractor/southparkstudios.py

@@ -5,21 +5,19 @@ from .mtv import MTVIE, _media_xml_tag
 
 
 class SouthParkStudiosIE(MTVIE):
 class SouthParkStudiosIE(MTVIE):
     IE_NAME = u'southparkstudios.com'
     IE_NAME = u'southparkstudios.com'
-    _VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$)'
+    _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
 
 
     _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
     _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
 
 
-    _TEST = {
+    # Overwrite MTVIE properties we don't want
+    _TESTS = [{
         u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
         u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
         u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
         u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
         u'info_dict': {
         u'info_dict': {
             u'title': u'Bat Daded',
             u'title': u'Bat Daded',
             u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
             u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
         },
         },
-    }
-
-    # Overwrite MTVIE properties we don't want
-    _TESTS = []
+    }]
 
 
     def _get_thumbnail_url(self, uri, itemdoc):
     def _get_thumbnail_url(self, uri, itemdoc):
         search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
         search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
@@ -31,8 +29,23 @@ class SouthParkStudiosIE(MTVIE):
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
+        url = u'http://www.' + mobj.group(u'url')
         video_id = mobj.group('id')
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
         webpage = self._download_webpage(url, video_id)
         mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
         mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
                                   webpage, u'mgid')
                                   webpage, u'mgid')
         return self._get_videos_info(mgid)
         return self._get_videos_info(mgid)
+
+class SouthparkDeIE(SouthParkStudiosIE):
+    IE_NAME = u'southpark.de'
+    _VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
+    _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
+
+    _TESTS = [{
+        u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
+        u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
+        u'info_dict': {
+            u'title': u'The Government Won\'t Respect My Privacy',
+            u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
+        },
+    }]

+ 35 - 0
youtube_dl/extractor/space.py

@@ -0,0 +1,35 @@
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+from ..utils import RegexNotFoundError, ExtractorError
+
+
+class SpaceIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html'
+    _TEST = {
+        u'add_ie': ['Brightcove'],
+        u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
+        u'info_dict': {
+            u'id': u'2780937028001',
+            u'ext': u'mp4',
+            u'title': u'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
+            u'description': u'md5:db81cf7f3122f95ed234b631a6ea1e61',
+            u'uploader': u'TechMedia Networks',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title = mobj.group('title')
+        webpage = self._download_webpage(url, title)
+        try:
+            # Some videos require the playerKey field, which isn't define in
+            # the BrightcoveExperience object
+            brightcove_url = self._og_search_video_url(webpage)
+        except RegexNotFoundError:
+            # Other videos works fine with the info from the object
+            brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if brightcove_url is None:
+            raise ExtractorError(u'The webpage does not contain a video', expected=True)
+        return self.url_result(brightcove_url, BrightcoveIE.ie_key())

+ 3 - 3
youtube_dl/extractor/spankwire.py

@@ -49,10 +49,10 @@ class SpankwireIE(InfoExtractor):
 
 
         formats = []
         formats = []
         for video_url in video_urls:
         for video_url in video_urls:
-            path = compat_urllib_parse_urlparse( video_url ).path
-            extension = os.path.splitext( path )[1][1:]
+            path = compat_urllib_parse_urlparse(video_url).path
+            extension = os.path.splitext(path)[1][1:]
             format = path.split('/')[4].split('_')[:2]
             format = path.split('/')[4].split('_')[:2]
-            format = "-".join( format )
+            format = "-".join(format)
             formats.append({
             formats.append({
                 'url': video_url,
                 'url': video_url,
                 'ext': extension,
                 'ext': extension,

+ 36 - 14
youtube_dl/extractor/spiegel.py

@@ -2,18 +2,27 @@ import re
 import xml.etree.ElementTree
 import xml.etree.ElementTree
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
+from ..utils import determine_ext
 
 
 
 
 class SpiegelIE(InfoExtractor):
 class SpiegelIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
-    _TEST = {
+    _TESTS = [{
         u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
         u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
         u'file': u'1259285.mp4',
         u'file': u'1259285.mp4',
         u'md5': u'2c2754212136f35fb4b19767d242f66e',
         u'md5': u'2c2754212136f35fb4b19767d242f66e',
         u'info_dict': {
         u'info_dict': {
             u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
             u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
         }
         }
-    }
+    },
+    {
+        u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
+        u'file': u'1309159.mp4',
+        u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
+        u'info_dict': {
+            u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
+        }
+    }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
         m = re.match(self._VALID_URL, url)
@@ -21,25 +30,38 @@ class SpiegelIE(InfoExtractor):
 
 
         webpage = self._download_webpage(url, video_id)
         webpage = self._download_webpage(url, video_id)
 
 
-        video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
-            webpage, u'title')
+        video_title = self._html_search_regex(
+            r'<div class="module-title">(.*?)</div>', webpage, u'title')
 
 
         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
-        xml_code = self._download_webpage(xml_url, video_id,
-                    note=u'Downloading XML', errnote=u'Failed to download XML')
+        xml_code = self._download_webpage(
+            xml_url, video_id,
+            note=u'Downloading XML', errnote=u'Failed to download XML')
 
 
         idoc = xml.etree.ElementTree.fromstring(xml_code)
         idoc = xml.etree.ElementTree.fromstring(xml_code)
-        last_type = idoc[-1]
-        filename = last_type.findall('./filename')[0].text
-        duration = float(last_type.findall('./duration')[0].text)
 
 
-        video_url = 'http://video2.spiegel.de/flash/' + filename
-        video_ext = filename.rpartition('.')[2]
+        formats = [
+            {
+                'format_id': n.tag.rpartition('type')[2],
+                'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
+                'width': int(n.find('./width').text),
+                'height': int(n.find('./height').text),
+                'abr': int(n.find('./audiobitrate').text),
+                'vbr': int(n.find('./videobitrate').text),
+                'vcodec': n.find('./codec').text,
+                'acodec': 'MP4A',
+            }
+            for n in list(idoc)
+            # Blacklist type 6, it's extremely LQ and not available on the same server
+            if n.tag.startswith('type') and n.tag != 'type6'
+        ]
+        formats.sort(key=lambda f: f['vbr'])
+        duration = float(idoc[0].findall('./duration')[0].text)
+
         info = {
         info = {
             'id': video_id,
             'id': video_id,
-            'url': video_url,
-            'ext': video_ext,
             'title': video_title,
             'title': video_title,
             'duration': duration,
             'duration': duration,
+            'formats': formats,
         }
         }
-        return [info]
+        return info

+ 6 - 6
youtube_dl/extractor/subtitles.py

@@ -12,9 +12,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
         return any([self._downloader.params.get('writesubtitles', False),
         return any([self._downloader.params.get('writesubtitles', False),
                     self._downloader.params.get('writeautomaticsub')])
                     self._downloader.params.get('writeautomaticsub')])
 
 
-    def _list_available_subtitles(self, video_id, webpage=None):
+    def _list_available_subtitles(self, video_id, webpage):
         """ outputs the available subtitles for the video """
         """ outputs the available subtitles for the video """
-        sub_lang_list = self._get_available_subtitles(video_id)
+        sub_lang_list = self._get_available_subtitles(video_id, webpage)
         auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
         auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
         sub_lang = ",".join(list(sub_lang_list.keys()))
         sub_lang = ",".join(list(sub_lang_list.keys()))
         self.to_screen(u'%s: Available subtitles for video: %s' %
         self.to_screen(u'%s: Available subtitles for video: %s' %
@@ -23,7 +23,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
         self.to_screen(u'%s: Available automatic captions for video: %s' %
         self.to_screen(u'%s: Available automatic captions for video: %s' %
                        (video_id, auto_lang))
                        (video_id, auto_lang))
 
 
-    def extract_subtitles(self, video_id, video_webpage=None):
+    def extract_subtitles(self, video_id, webpage):
         """
         """
         returns {sub_lang: sub} ,{} if subtitles not found or None if the
         returns {sub_lang: sub} ,{} if subtitles not found or None if the
         subtitles aren't requested.
         subtitles aren't requested.
@@ -32,9 +32,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
             return None
             return None
         available_subs_list = {}
         available_subs_list = {}
         if self._downloader.params.get('writeautomaticsub', False):
         if self._downloader.params.get('writeautomaticsub', False):
-            available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
+            available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
         if self._downloader.params.get('writesubtitles', False):
         if self._downloader.params.get('writesubtitles', False):
-            available_subs_list.update(self._get_available_subtitles(video_id))
+            available_subs_list.update(self._get_available_subtitles(video_id, webpage))
 
 
         if not available_subs_list:  # error, it didn't get the available subtitles
         if not available_subs_list:  # error, it didn't get the available subtitles
             return {}
             return {}
@@ -74,7 +74,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
             return
             return
         return sub
         return sub
 
 
-    def _get_available_subtitles(self, video_id):
+    def _get_available_subtitles(self, video_id, webpage):
         """
         """
         returns {sub_lang: url} or {} if not available
         returns {sub_lang: url} or {} if not available
         Must be redefined by the subclasses
         Must be redefined by the subclasses

+ 33 - 8
youtube_dl/extractor/teamcoco.py

@@ -1,4 +1,5 @@
 import re
 import re
+import xml.etree.ElementTree
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
@@ -11,7 +12,7 @@ class TeamcocoIE(InfoExtractor):
     _TEST = {
     _TEST = {
         u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
         u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
         u'file': u'19705.mp4',
         u'file': u'19705.mp4',
-        u'md5': u'27b6f7527da5acf534b15f21b032656e',
+        u'md5': u'cde9ba0fa3506f5f017ce11ead928f9a',
         u'info_dict': {
         u'info_dict': {
             u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.", 
             u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.", 
             u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
             u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
@@ -31,16 +32,40 @@ class TeamcocoIE(InfoExtractor):
         self.report_extraction(video_id)
         self.report_extraction(video_id)
 
 
         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
-        data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
+        data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
+        data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
 
 
-        video_url = self._html_search_regex(r'<file [^>]*type="high".*?>(.*?)</file>',
-            data, u'video URL')
 
 
-        return [{
+        qualities = ['500k', '480p', '1000k', '720p', '1080p']
+        formats = []
+        for file in data.findall('files/file'):
+            if file.attrib.get('playmode') == 'all':
+                # it just duplicates one of the entries
+                break
+            file_url = file.text
+            m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
+            if m_format is not None:
+                format_id = m_format.group(1)
+            else:
+                format_id = file.attrib['bitrate']
+            formats.append({
+                'url': file_url,
+                'ext': 'mp4',
+                'format_id': format_id,
+            })
+        def sort_key(f):
+            try:
+                return qualities.index(f['format_id'])
+            except ValueError:
+                return -1
+        formats.sort(key=sort_key)
+        if not formats:
+            raise RegexNotFoundError(u'Unable to extract video URL')
+
+        return {
             'id':          video_id,
             'id':          video_id,
-            'url':         video_url,
-            'ext':         'mp4',
+            'formats': formats,
             'title':       self._og_search_title(webpage),
             'title':       self._og_search_title(webpage),
             'thumbnail':   self._og_search_thumbnail(webpage),
             'thumbnail':   self._og_search_thumbnail(webpage),
             'description': self._og_search_description(webpage),
             'description': self._og_search_description(webpage),
-        }]
+        }

+ 47 - 25
youtube_dl/extractor/ted.py

@@ -1,10 +1,14 @@
 import json
 import json
 import re
 import re
 
 
-from .common import InfoExtractor
+from .subtitles import SubtitlesInfoExtractor
 
 
+from ..utils import (
+    compat_str,
+    RegexNotFoundError,
+)
 
 
-class TEDIE(InfoExtractor):
+class TEDIE(SubtitlesInfoExtractor):
     _VALID_URL=r'''http://www\.ted\.com/
     _VALID_URL=r'''http://www\.ted\.com/
                    (
                    (
                         ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
                         ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
@@ -32,33 +36,32 @@ class TEDIE(InfoExtractor):
     def _real_extract(self, url):
     def _real_extract(self, url):
         m=re.match(self._VALID_URL, url, re.VERBOSE)
         m=re.match(self._VALID_URL, url, re.VERBOSE)
         if m.group('type_talk'):
         if m.group('type_talk'):
-            return [self._talk_info(url)]
+            return self._talk_info(url)
         else :
         else :
             playlist_id=m.group('playlist_id')
             playlist_id=m.group('playlist_id')
             name=m.group('name')
             name=m.group('name')
             self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
             self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
             return [self._playlist_videos_info(url,name,playlist_id)]
             return [self._playlist_videos_info(url,name,playlist_id)]
 
 
-    def _playlist_videos_info(self,url,name,playlist_id=0):
+
+    def _playlist_videos_info(self, url, name, playlist_id):
         '''Returns the videos of the playlist'''
         '''Returns the videos of the playlist'''
-        video_RE=r'''
-                     <li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)"
-                     ([.\s]*?)data-playlist_item_id="(\d+)"
-                     ([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
-                     '''
-        video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>'
-        webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
-        m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
-        m_names=re.finditer(video_name_RE,webpage)
+
+        webpage = self._download_webpage(
+            url, playlist_id, u'Downloading playlist webpage')
+        matches = re.finditer(
+            r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
+            webpage)
 
 
         playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
         playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
                                                  webpage, 'playlist title')
                                                  webpage, 'playlist title')
 
 
-        playlist_entries = []
-        for m_video, m_name in zip(m_videos,m_names):
-            talk_url='http://www.ted.com%s' % m_name.group('talk_url')
-            playlist_entries.append(self.url_result(talk_url, 'TED'))
-        return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title)
+        playlist_entries = [
+            self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED')
+            for m in matches
+        ]
+        return self.playlist_result(
+            playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title)
 
 
     def _talk_info(self, url, video_id=0):
     def _talk_info(self, url, video_id=0):
         """Return the video for the talk in the url"""
         """Return the video for the talk in the url"""
@@ -81,16 +84,35 @@ class TEDIE(InfoExtractor):
             'ext': 'mp4',
             'ext': 'mp4',
             'url': stream['file'],
             'url': stream['file'],
             'format': stream['id']
             'format': stream['id']
-            } for stream in info['htmlStreams']]
-        info = {
-            'id': info['id'],
+        } for stream in info['htmlStreams']]
+
+        video_id = info['id']
+
+        # subtitles
+        video_subtitles = self.extract_subtitles(video_id, webpage)
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, webpage)
+            return
+
+        return {
+            'id': video_id,
             'title': title,
             'title': title,
             'thumbnail': thumbnail,
             'thumbnail': thumbnail,
             'description': desc,
             'description': desc,
+            'subtitles': video_subtitles,
             'formats': formats,
             'formats': formats,
         }
         }
 
 
-        # TODO: Remove when #980 has been merged
-        info.update(info['formats'][-1])
-
-        return info
+    def _get_available_subtitles(self, video_id, webpage):
+        try:
+            options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
+            languages = re.findall(r'(?:<option value=")(\S+)"', options)
+            if languages:
+                sub_lang_list = {}
+                for l in languages:
+                    url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
+                    sub_lang_list[l] = url
+                return sub_lang_list
+        except RegexNotFoundError as err:
+            self._downloader.report_warning(u'video doesn\'t have subtitles')
+        return {}

+ 3 - 3
youtube_dl/extractor/tube8.py

@@ -46,10 +46,10 @@ class Tube8IE(InfoExtractor):
         if webpage.find('"encrypted":true')!=-1:
         if webpage.find('"encrypted":true')!=-1:
             password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
             password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
             video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
             video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
-        path = compat_urllib_parse_urlparse( video_url ).path
-        extension = os.path.splitext( path )[1][1:]
+        path = compat_urllib_parse_urlparse(video_url).path
+        extension = os.path.splitext(path)[1][1:]
         format = path.split('/')[4].split('_')[:2]
         format = path.split('/')[4].split('_')[:2]
-        format = "-".join( format )
+        format = "-".join(format)
 
 
         return {
         return {
             'id': video_id,
             'id': video_id,

+ 42 - 0
youtube_dl/extractor/tvp.py

@@ -0,0 +1,42 @@
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class TvpIE(InfoExtractor):
+    IE_NAME = u'tvp.pl'
+    _VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
+        u'md5': u'148408967a6a468953c0a75cbdaf0d7a',
+        u'file': u'12878238.wmv',
+        u'info_dict': {
+            u'title': u'31.10.2013 - Odcinek 2',
+            u'description': u'31.10.2013 - Odcinek 2',
+        },
+        u'skip': u'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id
+        json_params = self._download_webpage(
+            json_url, video_id, u"Downloading video metadata")
+
+        params = json.loads(json_params)
+        self.report_extraction(video_id)
+        video_url = params['video_url']
+
+        title = self._og_search_title(webpage, fatal=True)
+        return {
+            'id': video_id,
+            'title': title,
+            'ext': 'wmv',
+            'url': video_url,
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }

+ 73 - 22
youtube_dl/extractor/vevo.py

@@ -5,7 +5,7 @@ import datetime
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
-    determine_ext,
+    compat_HTTPError,
     ExtractorError,
     ExtractorError,
 )
 )
 
 
@@ -16,26 +16,22 @@ class VevoIE(InfoExtractor):
     (currently used by MTVIE)
     (currently used by MTVIE)
     """
     """
     _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
     _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
-    _TEST = {
+    _TESTS = [{
         u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
         u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
         u'file': u'GB1101300280.mp4',
         u'file': u'GB1101300280.mp4',
+        u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
         u'info_dict': {
         u'info_dict': {
             u"upload_date": u"20130624",
             u"upload_date": u"20130624",
             u"uploader": u"Hurts",
             u"uploader": u"Hurts",
             u"title": u"Somebody to Die For",
             u"title": u"Somebody to Die For",
-            u'duration': 230,
+            u"duration": 230,
+            u"width": 1920,
+            u"height": 1080,
         }
         }
-    }
+    }]
+    _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
 
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
-        info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
-
-        self.report_extraction(video_id)
-        video_info = json.loads(info_json)['video']
+    def _formats_from_json(self, video_info):
         last_version = {'version': -1}
         last_version = {'version': -1}
         for version in video_info['videoVersions']:
         for version in video_info['videoVersions']:
             # These are the HTTP downloads, other types are for different manifests
             # These are the HTTP downloads, other types are for different manifests
@@ -50,17 +46,75 @@ class VevoIE(InfoExtractor):
         # Already sorted from worst to best quality
         # Already sorted from worst to best quality
         for rend in renditions.findall('rendition'):
         for rend in renditions.findall('rendition'):
             attr = rend.attrib
             attr = rend.attrib
-            f_url = attr['url']
+            format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
             formats.append({
             formats.append({
-                'url': f_url,
-                'ext': determine_ext(f_url),
+                'url': attr['url'],
+                'format_id': attr['name'],
+                'format_note': format_note,
                 'height': int(attr['frameheight']),
                 'height': int(attr['frameheight']),
                 'width': int(attr['frameWidth']),
                 'width': int(attr['frameWidth']),
             })
             })
+        return formats
+
+    def _formats_from_smil(self, smil_xml):
+        formats = []
+        smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
+        els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
+        for el in els:
+            src = el.attrib['src']
+            m = re.match(r'''(?xi)
+                (?P<ext>[a-z0-9]+):
+                (?P<path>
+                    [/a-z0-9]+     # The directory and main part of the URL
+                    _(?P<cbr>[0-9]+)k
+                    _(?P<width>[0-9]+)x(?P<height>[0-9]+)
+                    _(?P<vcodec>[a-z0-9]+)
+                    _(?P<vbr>[0-9]+)
+                    _(?P<acodec>[a-z0-9]+)
+                    _(?P<abr>[0-9]+)
+                    \.[a-z0-9]+  # File extension
+                )''', src)
+            if not m:
+                continue
 
 
-        date_epoch = int(self._search_regex(
-            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000
-        upload_date = datetime.datetime.fromtimestamp(date_epoch)
+            format_url = self._SMIL_BASE_URL + m.group('path')
+            formats.append({
+                'url': format_url,
+                'format_id': u'SMIL_' + m.group('cbr'),
+                'vcodec': m.group('vcodec'),
+                'acodec': m.group('acodec'),
+                'vbr': int(m.group('vbr')),
+                'abr': int(m.group('abr')),
+                'ext': m.group('ext'),
+                'width': int(m.group('width')),
+                'height': int(m.group('height')),
+            })
+        return formats
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
+        info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
+        video_info = json.loads(info_json)['video']
+
+        formats = self._formats_from_json(video_info)
+        try:
+            smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
+                self._SMIL_BASE_URL, video_id, video_id.lower())
+            smil_xml = self._download_webpage(smil_url, video_id,
+                                              u'Downloading SMIL info')
+            formats.extend(self._formats_from_smil(smil_xml))
+        except ExtractorError as ee:
+            if not isinstance(ee.cause, compat_HTTPError):
+                raise
+            self._downloader.report_warning(
+                u'Cannot download SMIL information, falling back to JSON ..')
+
+        timestamp_ms = int(self._search_regex(
+            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
+        upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
         info = {
         info = {
             'id': video_id,
             'id': video_id,
             'title': video_info['title'],
             'title': video_info['title'],
@@ -71,7 +125,4 @@ class VevoIE(InfoExtractor):
             'duration': video_info['duration'],
             'duration': video_info['duration'],
         }
         }
 
 
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-
         return info
         return info

+ 1 - 1
youtube_dl/extractor/viddler.py

@@ -8,7 +8,7 @@ from ..utils import (
 
 
 
 
 class ViddlerIE(InfoExtractor):
 class ViddlerIE(InfoExtractor):
-    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)'
+    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
     _TEST = {
     _TEST = {
         u"url": u"http://www.viddler.com/v/43903784",
         u"url": u"http://www.viddler.com/v/43903784",
         u'file': u'43903784.mp4',
         u'file': u'43903784.mp4',

+ 8 - 9
youtube_dl/extractor/vimeo.py

@@ -20,14 +20,14 @@ class VimeoIE(InfoExtractor):
     """Information extractor for vimeo.com."""
     """Information extractor for vimeo.com."""
 
 
     # _VALID_URL matches Vimeo URLs
     # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
     _NETRC_MACHINE = 'vimeo'
     _NETRC_MACHINE = 'vimeo'
     IE_NAME = u'vimeo'
     IE_NAME = u'vimeo'
     _TESTS = [
     _TESTS = [
         {
         {
             u'url': u'http://vimeo.com/56015672#at=0',
             u'url': u'http://vimeo.com/56015672#at=0',
             u'file': u'56015672.mp4',
             u'file': u'56015672.mp4',
-            u'md5': u'ae7a1d8b183758a0506b0622f37dfa14',
+            u'md5': u'8879b6cc097e987f02484baf890129e5',
             u'info_dict': {
             u'info_dict': {
                 u"upload_date": u"20121220", 
                 u"upload_date": u"20121220", 
                 u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
                 u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
@@ -128,11 +128,9 @@ class VimeoIE(InfoExtractor):
             raise ExtractorError(u'Invalid URL: %s' % url)
             raise ExtractorError(u'Invalid URL: %s' % url)
 
 
         video_id = mobj.group('id')
         video_id = mobj.group('id')
-        if not mobj.group('proto'):
-            url = 'https://' + url
-        elif mobj.group('pro'):
+        if mobj.group('pro') or mobj.group('player'):
             url = 'http://player.vimeo.com/video/' + video_id
             url = 'http://player.vimeo.com/video/' + video_id
-        elif mobj.group('direct_link'):
+        else:
             url = 'https://vimeo.com/' + video_id
             url = 'https://vimeo.com/' + video_id
 
 
         # Retrieve video webpage to extract further information
         # Retrieve video webpage to extract further information
@@ -205,7 +203,7 @@ class VimeoIE(InfoExtractor):
         # Vimeo specific: extract video codec and quality information
         # Vimeo specific: extract video codec and quality information
         # First consider quality, then codecs, then take everything
         # First consider quality, then codecs, then take everything
         codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')]
         codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')]
-        files = { 'hd': [], 'sd': [], 'other': []}
+        files = {'hd': [], 'sd': [], 'other': []}
         config_files = config["video"].get("files") or config["request"].get("files")
         config_files = config["video"].get("files") or config["request"].get("files")
         for codec_name, codec_extension in codecs:
         for codec_name, codec_extension in codecs:
             for quality in config_files.get(codec_name, []):
             for quality in config_files.get(codec_name, []):
@@ -234,7 +232,7 @@ class VimeoIE(InfoExtractor):
         if len(formats) == 0:
         if len(formats) == 0:
             raise ExtractorError(u'No known codec found')
             raise ExtractorError(u'No known codec found')
 
 
-        return [{
+        return {
             'id':       video_id,
             'id':       video_id,
             'uploader': video_uploader,
             'uploader': video_uploader,
             'uploader_id': video_uploader_id,
             'uploader_id': video_uploader_id,
@@ -243,7 +241,8 @@ class VimeoIE(InfoExtractor):
             'thumbnail':    video_thumbnail,
             'thumbnail':    video_thumbnail,
             'description':  video_description,
             'description':  video_description,
             'formats': formats,
             'formats': formats,
-        }]
+            'webpage_url': url,
+        }
 
 
 
 
 class VimeoChannelIE(InfoExtractor):
 class VimeoChannelIE(InfoExtractor):

+ 1 - 1
youtube_dl/extractor/vine.py

@@ -27,7 +27,7 @@ class VineIE(InfoExtractor):
         video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
         video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
             webpage, u'video URL')
             webpage, u'video URL')
 
 
-        uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
+        uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
             webpage, u'uploader', fatal=False, flags=re.DOTALL)
             webpage, u'uploader', fatal=False, flags=re.DOTALL)
 
 
         return [{
         return [{

+ 45 - 0
youtube_dl/extractor/vk.py

@@ -0,0 +1,45 @@
+# encoding: utf-8
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_str,
+    unescapeHTML,
+)
+
+
+class VKIE(InfoExtractor):
+    IE_NAME = u'vk.com'
+    _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)'
+
+    _TEST = {
+        u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
+        u'md5': u'0deae91935c54e00003c2a00646315f0',
+        u'info_dict': {
+            u'id': u'162222515',
+            u'ext': u'flv',
+            u'title': u'ProtivoGunz - Хуёвая песня',
+            u'uploader': u'Noize MC',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
+        info_page = self._download_webpage(info_url, video_id)
+        m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
+        if m_yt is not None:
+            self.to_screen(u'Youtube video detected')
+            return self.url_result(m_yt.group(1), 'Youtube')
+        vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars')
+        vars = json.loads(vars_json)
+
+        return {
+            'id': compat_str(vars['vid']),
+            'url': vars['url240'],
+            'title': unescapeHTML(vars['md_title']),
+            'thumbnail': vars['jpg'],
+            'uploader': vars['md_author'],
+        }

+ 1 - 0
youtube_dl/extractor/weibo.py

@@ -13,6 +13,7 @@ class WeiboIE(InfoExtractor):
     _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
     _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
 
 
     _TEST = {
     _TEST = {
+        u'add_ie': ['Sina'],
         u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
         u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
         u'file': u'98322879.flv',
         u'file': u'98322879.flv',
         u'info_dict': {
         u'info_dict': {

+ 1 - 1
youtube_dl/extractor/xnxx.py

@@ -9,7 +9,7 @@ from ..utils import (
 
 
 
 
 class XNXXIE(InfoExtractor):
 class XNXXIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
+    _VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
     VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
     VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
     VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
     VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
     VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
     VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'

+ 55 - 0
youtube_dl/extractor/xtube.py

@@ -0,0 +1,55 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+)
+
+class XTubeIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
+    _TEST = {
+        u'url': u'http://www.xtube.com/watch.php?v=kVTUy_G222_',
+        u'file': u'kVTUy_G222_.mp4',
+        u'md5': u'092fbdd3cbe292c920ef6fc6a8a9cdab',
+        u'info_dict': {
+            u"title": u"strange erotica",
+            u"description": u"surreal gay themed erotica...almost an ET kind of thing",
+            u"uploader": u"greenshowers",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, u'title')
+        video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False)
+        video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', default=None)
+        video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/')
+        path = compat_urllib_parse_urlparse(video_url).path
+        extension = os.path.splitext(path)[1][1:]
+        format = path.split('/')[5].split('_')[:2]
+        format[0] += 'p'
+        format[1] += 'k'
+        format = "-".join(format)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'uploader': video_uploader,
+            'description': video_description,
+            'url': video_url,
+            'ext': extension,
+            'format': format,
+            'format_id': format,
+            'age_limit': 18,
+        }

+ 1 - 1
youtube_dl/extractor/yahoo.py

@@ -132,7 +132,7 @@ class YahooSearchIE(SearchInfoExtractor):
                 mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
                 mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
                 e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
                 e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
                 res['entries'].append(e)
                 res['entries'].append(e)
-            if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )):
+            if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1)):
                 break
                 break
 
 
         return res
         return res

+ 3 - 3
youtube_dl/extractor/youku.py

@@ -18,7 +18,7 @@ class YoukuIE(InfoExtractor):
         u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
         u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
         u"file": u"XNDgyMDQ2NTQw_part00.flv",
         u"file": u"XNDgyMDQ2NTQw_part00.flv",
         u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b",
         u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b",
-        u"params": { u"test": False },
+        u"params": {u"test": False},
         u"info_dict": {
         u"info_dict": {
             u"title": u"youtube-dl test video \"'/\\ä↭𝕐"
             u"title": u"youtube-dl test video \"'/\\ä↭𝕐"
         }
         }
@@ -37,8 +37,8 @@ class YoukuIE(InfoExtractor):
         source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
         source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
         seed = float(seed)
         seed = float(seed)
         for i in range(len(source)):
         for i in range(len(source)):
-            seed  =  (seed * 211 + 30031 ) % 65536
-            index  =  math.floor(seed / 65536 * len(source) )
+            seed  =  (seed * 211 + 30031) % 65536
+            index  =  math.floor(seed / 65536 * len(source))
             mixed.append(source[int(index)])
             mixed.append(source[int(index)])
             source.remove(source[int(index)])
             source.remove(source[int(index)])
         #return ''.join(mixed)
         #return ''.join(mixed)

+ 4 - 4
youtube_dl/extractor/youporn.py

@@ -81,14 +81,14 @@ class YouPornIE(InfoExtractor):
             # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
             # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
             # A path looks like this:
             # A path looks like this:
             # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
             # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
-            video_url = unescapeHTML( link )
-            path = compat_urllib_parse_urlparse( video_url ).path
-            extension = os.path.splitext( path )[1][1:]
+            video_url = unescapeHTML(link)
+            path = compat_urllib_parse_urlparse(video_url).path
+            extension = os.path.splitext(path)[1][1:]
             format = path.split('/')[4].split('_')[:2]
             format = path.split('/')[4].split('_')[:2]
 
 
             # size = format[0]
             # size = format[0]
             # bitrate = format[1]
             # bitrate = format[1]
-            format = "-".join( format )
+            format = "-".join(format)
             # title = u'%s-%s-%s' % (video_title, size, bitrate)
             # title = u'%s-%s-%s' % (video_title, size, bitrate)
 
 
             formats.append({
             formats.append({

+ 36 - 42
youtube_dl/extractor/youtube.py

@@ -74,14 +74,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
             return False
             return False
 
 
-        galx = None
-        dsh = None
-        match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
-        if match:
-          galx = match.group(1)
-        match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
-        if match:
-          dsh = match.group(1)
+        galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
+                                  login_page, u'Login GALX parameter')
 
 
         # Log in
         # Log in
         login_form_strs = {
         login_form_strs = {
@@ -95,7 +89,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                 u'checkConnection': u'',
                 u'checkConnection': u'',
                 u'checkedDomains': u'youtube',
                 u'checkedDomains': u'youtube',
                 u'dnConn': u'',
                 u'dnConn': u'',
-                u'dsh': dsh,
                 u'pstMsg': u'0',
                 u'pstMsg': u'0',
                 u'rmShown': u'1',
                 u'rmShown': u'1',
                 u'secTok': u'',
                 u'secTok': u'',
@@ -346,18 +339,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
             }
             }
         },
         },
-        {
-            u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
-            u"file":  u"1ltcDfZMA3U.mp4",
-            u"note": u"Test VEVO video (#897)",
-            u"info_dict": {
-                u"upload_date": u"20070518",
-                u"title": u"Maps - It Will Find You",
-                u"description": u"Music video by Maps performing It Will Find You.",
-                u"uploader": u"MuteUSA",
-                u"uploader_id": u"MuteUSA"
-            }
-        },
         {
         {
             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
             u"file":  u"UxxajLWwzqY.mp4",
             u"file":  u"UxxajLWwzqY.mp4",
@@ -1038,6 +1019,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         """Turn the encrypted s field into a working signature"""
         """Turn the encrypted s field into a working signature"""
 
 
         if player_url is not None:
         if player_url is not None:
+            if player_url.startswith(u'//'):
+                player_url = u'https:' + player_url
             try:
             try:
                 player_id = (player_url, len(s))
                 player_id = (player_url, len(s))
                 if player_id not in self._player_cache:
                 if player_id not in self._player_cache:
@@ -1101,7 +1084,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         else:
         else:
             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
 
 
-    def _get_available_subtitles(self, video_id):
+    def _get_available_subtitles(self, video_id, webpage):
         try:
         try:
             sub_list = self._download_webpage(
             sub_list = self._download_webpage(
                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
@@ -1117,8 +1100,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             params = compat_urllib_parse.urlencode({
             params = compat_urllib_parse.urlencode({
                 'lang': lang,
                 'lang': lang,
                 'v': video_id,
                 'v': video_id,
-                'fmt': self._downloader.params.get('subtitlesformat'),
-                'name': l[0],
+                'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
+                'name': l[0].encode('utf-8'),
             })
             })
             url = u'http://www.youtube.com/api/timedtext?' + params
             url = u'http://www.youtube.com/api/timedtext?' + params
             sub_lang_list[lang] = url
             sub_lang_list[lang] = url
@@ -1130,7 +1113,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
     def _get_available_automatic_caption(self, video_id, webpage):
     def _get_available_automatic_caption(self, video_id, webpage):
         """We need the webpage for getting the captions url, pass it as an
         """We need the webpage for getting the captions url, pass it as an
            argument to speed up the process."""
            argument to speed up the process."""
-        sub_format = self._downloader.params.get('subtitlesformat')
+        sub_format = self._downloader.params.get('subtitlesformat', 'srt')
         self.to_screen(u'%s: Looking for automatic captions' % video_id)
         self.to_screen(u'%s: Looking for automatic captions' % video_id)
         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
@@ -1318,6 +1301,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             else:
             else:
                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
 
 
+        if 'view_count' in video_info:
+            view_count = int(video_info['view_count'][0])
+        else:
+            view_count = None
+
         # Check for "rental" videos
         # Check for "rental" videos
         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
             raise ExtractorError(u'"rental" videos not supported')
             raise ExtractorError(u'"rental" videos not supported')
@@ -1504,7 +1492,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'subtitles':    video_subtitles,
                 'subtitles':    video_subtitles,
                 'duration':     video_duration,
                 'duration':     video_duration,
                 'age_limit':    18 if age_gate else 0,
                 'age_limit':    18 if age_gate else 0,
-                'annotations':  video_annotations
+                'annotations':  video_annotations,
+                'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
+                'view_count': view_count,
             })
             })
         return results
         return results
 
 
@@ -1590,7 +1580,6 @@ class YoutubePlaylistIE(InfoExtractor):
 class YoutubeChannelIE(InfoExtractor):
 class YoutubeChannelIE(InfoExtractor):
     IE_DESC = u'YouTube.com channels'
     IE_DESC = u'YouTube.com channels'
     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
-    _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
     IE_NAME = u'youtube:channel'
     IE_NAME = u'youtube:channel'
@@ -1611,29 +1600,30 @@ class YoutubeChannelIE(InfoExtractor):
         # Download channel page
         # Download channel page
         channel_id = mobj.group(1)
         channel_id = mobj.group(1)
         video_ids = []
         video_ids = []
-        pagenum = 1
-
-        url = self._TEMPLATE_URL % (channel_id, pagenum)
-        page = self._download_webpage(url, channel_id,
-                                      u'Downloading page #%s' % pagenum)
-
-        # Extract video identifiers
-        ids_in_page = self.extract_videos_from_page(page)
-        video_ids.extend(ids_in_page)
+        url = 'https://www.youtube.com/channel/%s/videos' % channel_id
+        channel_page = self._download_webpage(url, channel_id)
+        if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
+            autogenerated = True
+        else:
+            autogenerated = False
 
 
-        # Download any subsequent channel pages using the json-based channel_ajax query
-        if self._MORE_PAGES_INDICATOR in page:
+        if autogenerated:
+            # The videos are contained in a single page
+            # the ajax pages can't be used, they are empty
+            video_ids = self.extract_videos_from_page(channel_page)
+        else:
+            # Download all channel pages using the json-based channel_ajax query
             for pagenum in itertools.count(1):
             for pagenum in itertools.count(1):
                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
                 page = self._download_webpage(url, channel_id,
                 page = self._download_webpage(url, channel_id,
                                               u'Downloading page #%s' % pagenum)
                                               u'Downloading page #%s' % pagenum)
-
+    
                 page = json.loads(page)
                 page = json.loads(page)
-
+    
                 ids_in_page = self.extract_videos_from_page(page['content_html'])
                 ids_in_page = self.extract_videos_from_page(page['content_html'])
                 video_ids.extend(ids_in_page)
                 video_ids.extend(ids_in_page)
-
-                if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
+    
+                if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
                     break
                     break
 
 
         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
@@ -1750,6 +1740,10 @@ class YoutubeSearchIE(SearchInfoExtractor):
         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
         return self.playlist_result(videos, query)
         return self.playlist_result(videos, query)
 
 
+class YoutubeSearchDateIE(YoutubeSearchIE):
+    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
+    _SEARCH_KEY = 'ytsearchdate'
+    IE_DESC = u'YouTube.com searches, newest videos first'
 
 
 class YoutubeShowIE(InfoExtractor):
 class YoutubeShowIE(InfoExtractor):
     IE_DESC = u'YouTube.com (multi-season) shows'
     IE_DESC = u'YouTube.com (multi-season) shows'

+ 5 - 1
youtube_dl/update.py

@@ -2,11 +2,15 @@ import io
 import json
 import json
 import traceback
 import traceback
 import hashlib
 import hashlib
+import os
 import subprocess
 import subprocess
 import sys
 import sys
 from zipimport import zipimporter
 from zipimport import zipimporter
 
 
-from .utils import *
+from .utils import (
+    compat_str,
+    compat_urllib_request,
+)
 from .version import __version__
 from .version import __version__
 
 
 def rsa_verify(message, signature, key):
 def rsa_verify(message, signature, key):

+ 1 - 1
youtube_dl/version.py

@@ -1,2 +1,2 @@
 
 
-__version__ = '2013.10.28'
+__version__ = '2013.11.17'