Selaa lähdekoodia

Merge pull request #2 from rg3/master

Update
rzhxeo 12 vuotta sitten
vanhempi
sitoutus
c0de39e6d4
47 muutettua tiedostoa jossa 2122 lisäystä ja 548 poistoa
  1. 1 0
      .gitignore
  2. 6 1
      README.md
  3. 405 0
      devscripts/buildserver.py
  4. 2 2
      devscripts/release.sh
  5. 0 109
      devscripts/youtube_genalgo.py
  6. 1 0
      test/test_all_urls.py
  7. 2 0
      test/test_dailymotion_subtitles.py
  8. 33 1
      test/test_playlists.py
  9. 80 0
      test/test_youtube_signature.py
  10. 2 0
      test/test_youtube_subtitles.py
  11. 36 10
      youtube_dl/FileDownloader.py
  12. 4 1
      youtube_dl/PostProcessor.py
  13. 27 14
      youtube_dl/YoutubeDL.py
  14. 34 12
      youtube_dl/__init__.py
  15. 17 2
      youtube_dl/extractor/__init__.py
  16. 42 70
      youtube_dl/extractor/appletrailers.py
  17. 4 3
      youtube_dl/extractor/archiveorg.py
  18. 27 0
      youtube_dl/extractor/bloomberg.py
  19. 51 13
      youtube_dl/extractor/brightcove.py
  20. 49 13
      youtube_dl/extractor/dailymotion.py
  21. 3 3
      youtube_dl/extractor/dreisat.py
  22. 37 0
      youtube_dl/extractor/ebaumsworld.py
  23. 2 2
      youtube_dl/extractor/facebook.py
  24. 79 0
      youtube_dl/extractor/fktv.py
  25. 51 11
      youtube_dl/extractor/francetv.py
  26. 2 1
      youtube_dl/extractor/funnyordie.py
  27. 0 11
      youtube_dl/extractor/generic.py
  28. 2 1
      youtube_dl/extractor/googleplus.py
  29. 2 2
      youtube_dl/extractor/hotnewhiphop.py
  30. 11 3
      youtube_dl/extractor/livestream.py
  31. 38 84
      youtube_dl/extractor/mixcloud.py
  32. 38 0
      youtube_dl/extractor/newgrounds.py
  33. 8 2
      youtube_dl/extractor/ooyala.py
  34. 16 3
      youtube_dl/extractor/rtlnow.py
  35. 43 2
      youtube_dl/extractor/soundcloud.py
  36. 38 0
      youtube_dl/extractor/southparkstudios.py
  37. 2 3
      youtube_dl/extractor/subtitles.py
  38. 2 2
      youtube_dl/extractor/trilulilu.py
  39. 38 0
      youtube_dl/extractor/vice.py
  40. 15 4
      youtube_dl/extractor/xhamster.py
  41. 65 67
      youtube_dl/extractor/yahoo.py
  42. 7 0
      youtube_dl/extractor/youku.py
  43. 706 50
      youtube_dl/extractor/youtube.py
  44. 42 32
      youtube_dl/extractor/zdf.py
  45. 23 12
      youtube_dl/update.py
  46. 28 1
      youtube_dl/utils.py
  47. 1 1
      youtube_dl/version.py

+ 1 - 0
.gitignore

@@ -24,3 +24,4 @@ updates_key.pem
 *.flv
 *.flv
 *.mp4
 *.mp4
 *.part
 *.part
+test/testdata

+ 6 - 1
README.md

@@ -19,7 +19,8 @@ which means you can modify it, redistribute it or use it however you like.
     -U, --update               update this program to latest version. Make sure
     -U, --update               update this program to latest version. Make sure
                                that you have sufficient permissions (run with
                                that you have sufficient permissions (run with
                                sudo if needed)
                                sudo if needed)
-    -i, --ignore-errors        continue on download errors
+    -i, --ignore-errors        continue on download errors, for example to to
+                               skip unavailable videos in a playlist
     --dump-user-agent          display the current browser identification
     --dump-user-agent          display the current browser identification
     --user-agent UA            specify a custom user agent
     --user-agent UA            specify a custom user agent
     --referer REF              specify a custom referer, use if the video access
     --referer REF              specify a custom referer, use if the video access
@@ -29,6 +30,10 @@ which means you can modify it, redistribute it or use it however you like.
     --extractor-descriptions   Output descriptions of all supported extractors
     --extractor-descriptions   Output descriptions of all supported extractors
     --proxy URL                Use the specified HTTP/HTTPS proxy
     --proxy URL                Use the specified HTTP/HTTPS proxy
     --no-check-certificate     Suppress HTTPS certificate validation.
     --no-check-certificate     Suppress HTTPS certificate validation.
+    --cache-dir None           Location in the filesystem where youtube-dl can
+                               store downloaded information permanently.
+                               ~/.youtube-dl/cache by default
+    --no-cache-dir             Disable filesystem caching
 
 
 ## Video Selection:
 ## Video Selection:
     --playlist-start NUMBER    playlist video to start at (default is 1)
     --playlist-start NUMBER    playlist video to start at (default is 1)

+ 405 - 0
devscripts/buildserver.py

@@ -0,0 +1,405 @@
+#!/usr/bin/python3
+
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from socketserver import ThreadingMixIn
+import argparse
+import ctypes
+import functools
+import sys
+import threading
+import traceback
+import os.path
+
+
+class BuildHTTPServer(ThreadingMixIn, HTTPServer):
+    allow_reuse_address = True
+
+
+advapi32 = ctypes.windll.advapi32
+
+SC_MANAGER_ALL_ACCESS = 0xf003f
+SC_MANAGER_CREATE_SERVICE = 0x02
+SERVICE_WIN32_OWN_PROCESS = 0x10
+SERVICE_AUTO_START = 0x2
+SERVICE_ERROR_NORMAL = 0x1
+DELETE = 0x00010000
+SERVICE_STATUS_START_PENDING = 0x00000002
+SERVICE_STATUS_RUNNING = 0x00000004
+SERVICE_ACCEPT_STOP = 0x1
+
+SVCNAME = 'youtubedl_builder'
+
+LPTSTR = ctypes.c_wchar_p
+START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR))
+
+
+class SERVICE_TABLE_ENTRY(ctypes.Structure):
+    _fields_ = [
+        ('lpServiceName', LPTSTR),
+        ('lpServiceProc', START_CALLBACK)
+    ]
+
+
+HandlerEx = ctypes.WINFUNCTYPE(
+    ctypes.c_int,     # return
+    ctypes.c_int,     # dwControl
+    ctypes.c_int,     # dwEventType
+    ctypes.c_void_p,  # lpEventData,
+    ctypes.c_void_p,  # lpContext,
+)
+
+
+def _ctypes_array(c_type, py_array):
+    ar = (c_type * len(py_array))()
+    ar[:] = py_array
+    return ar
+
+
+def win_OpenSCManager():
+    res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS)
+    if not res:
+        raise Exception('Opening service manager failed - '
+                        'are you running this as administrator?')
+    return res
+
+
+def win_install_service(service_name, cmdline):
+    manager = win_OpenSCManager()
+    try:
+        h = advapi32.CreateServiceW(
+            manager, service_name, None,
+            SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS,
+            SERVICE_AUTO_START, SERVICE_ERROR_NORMAL,
+            cmdline, None, None, None, None, None)
+        if not h:
+            raise OSError('Service creation failed: %s' % ctypes.FormatError())
+
+        advapi32.CloseServiceHandle(h)
+    finally:
+        advapi32.CloseServiceHandle(manager)
+
+
+def win_uninstall_service(service_name):
+    manager = win_OpenSCManager()
+    try:
+        h = advapi32.OpenServiceW(manager, service_name, DELETE)
+        if not h:
+            raise OSError('Could not find service %s: %s' % (
+                service_name, ctypes.FormatError()))
+
+        try:
+            if not advapi32.DeleteService(h):
+                raise OSError('Deletion failed: %s' % ctypes.FormatError())
+        finally:
+            advapi32.CloseServiceHandle(h)
+    finally:
+        advapi32.CloseServiceHandle(manager)
+
+
+def win_service_report_event(service_name, msg, is_error=True):
+    with open('C:/sshkeys/log', 'a', encoding='utf-8') as f:
+        f.write(msg + '\n')
+
+    event_log = advapi32.RegisterEventSourceW(None, service_name)
+    if not event_log:
+        raise OSError('Could not report event: %s' % ctypes.FormatError())
+
+    try:
+        type_id = 0x0001 if is_error else 0x0004
+        event_id = 0xc0000000 if is_error else 0x40000000
+        lines = _ctypes_array(LPTSTR, [msg])
+
+        if not advapi32.ReportEventW(
+                event_log, type_id, 0, event_id, None, len(lines), 0,
+                lines, None):
+            raise OSError('Event reporting failed: %s' % ctypes.FormatError())
+    finally:
+        advapi32.DeregisterEventSource(event_log)
+
+
+def win_service_handler(stop_event, *args):
+    try:
+        raise ValueError('Handler called with args ' + repr(args))
+        TODO
+    except Exception as e:
+        tb = traceback.format_exc()
+        msg = str(e) + '\n' + tb
+        win_service_report_event(service_name, msg, is_error=True)
+        raise
+
+
+def win_service_set_status(handle, status_code):
+    svcStatus = SERVICE_STATUS()
+    svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS
+    svcStatus.dwCurrentState = status_code
+    svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP
+
+    svcStatus.dwServiceSpecificExitCode = 0
+
+    if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)):
+        raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError())
+
+
+def win_service_main(service_name, real_main, argc, argv_raw):
+    try:
+        #args = [argv_raw[i].value for i in range(argc)]
+        stop_event = threading.Event()
+        handler = HandlerEx(functools.partial(stop_event, win_service_handler))
+        h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)
+        if not h:
+            raise OSError('Handler registration failed: %s' %
+                          ctypes.FormatError())
+
+        TODO
+    except Exception as e:
+        tb = traceback.format_exc()
+        msg = str(e) + '\n' + tb
+        win_service_report_event(service_name, msg, is_error=True)
+        raise
+
+
+def win_service_start(service_name, real_main):
+    try:
+        cb = START_CALLBACK(
+            functools.partial(win_service_main, service_name, real_main))
+        dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [
+            SERVICE_TABLE_ENTRY(
+                service_name,
+                cb
+            ),
+            SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK))
+        ])
+
+        if not advapi32.StartServiceCtrlDispatcherW(dispatch_table):
+            raise OSError('ctypes start failed: %s' % ctypes.FormatError())
+    except Exception as e:
+        tb = traceback.format_exc()
+        msg = str(e) + '\n' + tb
+        win_service_report_event(service_name, msg, is_error=True)
+        raise
+
+
+def main(args=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--install',
+                        action='store_const', dest='action', const='install',
+                        help='Launch at Windows startup')
+    parser.add_argument('-u', '--uninstall',
+                        action='store_const', dest='action', const='uninstall',
+                        help='Remove Windows service')
+    parser.add_argument('-s', '--service',
+                        action='store_const', dest='action', const='service',
+                        help='Run as a Windows service')
+    parser.add_argument('-b', '--bind', metavar='<host:port>',
+                        action='store', default='localhost:8142',
+                        help='Bind to host:port (default %default)')
+    options = parser.parse_args(args=args)
+
+    if options.action == 'install':
+        fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox')
+        cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind)
+        win_install_service(SVCNAME, cmdline)
+        return
+
+    if options.action == 'uninstall':
+        win_uninstall_service(SVCNAME)
+        return
+
+    if options.action == 'service':
+        win_service_start(SVCNAME, main)
+        return
+
+    host, port_str = options.bind.split(':')
+    port = int(port_str)
+
+    print('Listening on %s:%d' % (host, port))
+    srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
+    thr = threading.Thread(target=srv.serve_forever)
+    thr.start()
+    input('Press ENTER to shut down')
+    srv.shutdown()
+    thr.join()
+
+
+def rmtree(path):
+    for name in os.listdir(path):
+        fname = os.path.join(path, name)
+        if os.path.isdir(fname):
+            rmtree(fname)
+        else:
+            os.chmod(fname, 0o666)
+            os.remove(fname)
+    os.rmdir(path)
+
+#==============================================================================
+
+class BuildError(Exception):
+    def __init__(self, output, code=500):
+        self.output = output
+        self.code = code
+
+    def __str__(self):
+        return self.output
+
+
+class HTTPError(BuildError):
+    pass
+
+
+class PythonBuilder(object):
+    def __init__(self, **kwargs):
+        pythonVersion = kwargs.pop('python', '2.7')
+        try:
+            key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion)
+            try:
+                self.pythonPath, _ = _winreg.QueryValueEx(key, '')
+            finally:
+                _winreg.CloseKey(key)
+        except Exception:
+            raise BuildError('No such Python version: %s' % pythonVersion)
+
+        super(PythonBuilder, self).__init__(**kwargs)
+
+
+class GITInfoBuilder(object):
+    def __init__(self, **kwargs):
+        try:
+            self.user, self.repoName = kwargs['path'][:2]
+            self.rev = kwargs.pop('rev')
+        except ValueError:
+            raise BuildError('Invalid path')
+        except KeyError as e:
+            raise BuildError('Missing mandatory parameter "%s"' % e.args[0])
+
+        path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user)
+        if not os.path.exists(path):
+            os.makedirs(path)
+        self.basePath = tempfile.mkdtemp(dir=path)
+        self.buildPath = os.path.join(self.basePath, 'build')
+
+        super(GITInfoBuilder, self).__init__(**kwargs)
+
+
+class GITBuilder(GITInfoBuilder):
+    def build(self):
+        try:
+            subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath])
+            subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath)
+        except subprocess.CalledProcessError as e:
+            raise BuildError(e.output)
+
+        super(GITBuilder, self).build()
+
+
+class YoutubeDLBuilder(object):
+    authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile']
+
+    def __init__(self, **kwargs):
+        if self.repoName != 'youtube-dl':
+            raise BuildError('Invalid repository "%s"' % self.repoName)
+        if self.user not in self.authorizedUsers:
+            raise HTTPError('Unauthorized user "%s"' % self.user, 401)
+
+        super(YoutubeDLBuilder, self).__init__(**kwargs)
+
+    def build(self):
+        try:
+            subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
+                                    cwd=self.buildPath)
+        except subprocess.CalledProcessError as e:
+            raise BuildError(e.output)
+
+        super(YoutubeDLBuilder, self).build()
+
+
+class DownloadBuilder(object):
+    def __init__(self, **kwargs):
+        self.handler = kwargs.pop('handler')
+        self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:]))
+        self.srcPath = os.path.abspath(os.path.normpath(self.srcPath))
+        if not self.srcPath.startswith(self.buildPath):
+            raise HTTPError(self.srcPath, 401)
+
+        super(DownloadBuilder, self).__init__(**kwargs)
+
+    def build(self):
+        if not os.path.exists(self.srcPath):
+            raise HTTPError('No such file', 404)
+        if os.path.isdir(self.srcPath):
+            raise HTTPError('Is a directory: %s' % self.srcPath, 401)
+
+        self.handler.send_response(200)
+        self.handler.send_header('Content-Type', 'application/octet-stream')
+        self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1])
+        self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size))
+        self.handler.end_headers()
+
+        with open(self.srcPath, 'rb') as src:
+            shutil.copyfileobj(src, self.handler.wfile)
+
+        super(DownloadBuilder, self).build()
+
+
+class CleanupTempDir(object):
+    def build(self):
+        try:
+            rmtree(self.basePath)
+        except Exception as e:
+            print('WARNING deleting "%s": %s' % (self.basePath, e))
+
+        super(CleanupTempDir, self).build()
+
+
+class Null(object):
+    def __init__(self, **kwargs):
+        pass
+
+    def start(self):
+        pass
+
+    def close(self):
+        pass
+
+    def build(self):
+        pass
+
+
+class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null):
+    pass
+
+
+class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
+    actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching.
+
+    def do_GET(self):
+        path = urlparse.urlparse(self.path)
+        paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()])
+        action, _, path = path.path.strip('/').partition('/')
+        if path:
+            path = path.split('/')
+            if action in self.actionDict:
+                try:
+                    builder = self.actionDict[action](path=path, handler=self, **paramDict)
+                    builder.start()
+                    try:
+                        builder.build()
+                    finally:
+                        builder.close()
+                except BuildError as e:
+                    self.send_response(e.code)
+                    msg = unicode(e).encode('UTF-8')
+                    self.send_header('Content-Type', 'text/plain; charset=UTF-8')
+                    self.send_header('Content-Length', len(msg))
+                    self.end_headers()
+                    self.wfile.write(msg)
+                except HTTPError as e:
+                    self.send_response(e.code, str(e))
+            else:
+                self.send_response(500, 'Unknown build method "%s"' % action)
+        else:
+            self.send_response(500, 'Malformed URL')
+
+#==============================================================================
+
+if __name__ == '__main__':
+    main()

+ 2 - 2
devscripts/release.sh

@@ -55,8 +55,8 @@ git push origin "$version"
 /bin/echo -e "\n### OK, now it is time to build the binaries..."
 /bin/echo -e "\n### OK, now it is time to build the binaries..."
 REV=$(git rev-parse HEAD)
 REV=$(git rev-parse HEAD)
 make youtube-dl youtube-dl.tar.gz
 make youtube-dl youtube-dl.tar.gz
-wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \
-	wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
+read -p "VM running? (y/n) " -n 1
+wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
 mkdir -p "build/$version"
 mkdir -p "build/$version"
 mv youtube-dl youtube-dl.exe "build/$version"
 mv youtube-dl youtube-dl.exe "build/$version"
 mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
 mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"

+ 0 - 109
devscripts/youtube_genalgo.py

@@ -1,109 +0,0 @@
-#!/usr/bin/env python
-
-# Generate youtube signature algorithm from test cases
-
-import sys
-
-tests = [
-    # 92 - vflQw-fB4 2013/07/17
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
-     "mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
-    # 90
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
-     "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
-    # 89 
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
-     "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
-    # 88 - vflapUV9V 2013/08/28
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
-     "ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
-    # 87
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
-     "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
-    # 86 - vfluy6kdb 2013/09/06
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
-     "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
-    # 85 - vflkuzxcs 2013/09/11
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
-     "T>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOvUY.REWQ0987654321mnbqcxzasdfghjklpoiuytr"),
-    # 84 - vflg0g8PQ 2013/08/29 (sporadic)
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
-     ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
-    # 83
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
-     ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
-    # 82 - vflGNjMhJ 2013/09/12
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
-     ".>/?;}[<=+-(*&^%$#@!MNBVCXeASDFGHKLPOqUYTREWQ0987654321mnbvcxzasdfghjklpoiuytrIwZ"),
-    # 81 - vflLC8JvQ 2013/07/25
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
-     "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
-    # 80 - vflZK4ZYR 2013/08/23 (sporadic)
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>",
-     "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"),
-    # 79 - vflLC8JvQ 2013/07/25 (sporadic)
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
-     "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
-]
-
-tests_age_gate = [
-    # 86 - vflqinMWD
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
-     "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
-]
-
-def find_matching(wrong, right):
-    idxs = [wrong.index(c) for c in right]
-    return compress(idxs)
-    return ('s[%d]' % i for i in idxs)
-
-def compress(idxs):
-    def _genslice(start, end, step):
-        starts = '' if start == 0 else str(start)
-        ends = ':%d' % (end+step)
-        steps = '' if step == 1 else (':%d' % step)
-        return 's[%s%s%s]' % (starts, ends, steps)
-
-    step = None
-    for i, prev in zip(idxs[1:], idxs[:-1]):
-        if step is not None:
-            if i - prev == step:
-                continue
-            yield _genslice(start, prev, step)
-            step = None
-            continue
-        if i - prev in [-1, 1]:
-            step = i - prev
-            start = prev
-            continue
-        else:
-            yield 's[%d]' % prev
-    if step is None:
-        yield 's[%d]' % i
-    else:
-        yield _genslice(start, i, step)
-
-def _assert_compress(inp, exp):
-    res = list(compress(inp))
-    if res != exp:
-        print('Got %r, expected %r' % (res, exp))
-        assert res == exp
-_assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]'])
-_assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]'])
-_assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]'])
-
-def gen(wrong, right, indent):
-    code = ' + '.join(find_matching(wrong, right))
-    return 'if len(s) == %d:\n%s    return %s\n' % (len(wrong), indent, code)
-
-def genall(tests):
-    indent = ' ' * 8
-    return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests)
-
-def main():
-    print(genall(tests))
-    print(u'    Age gate:')
-    print(genall(tests_age_gate))
-
-if __name__ == '__main__':
-    main()

+ 1 - 0
test/test_all_urls.py

@@ -36,6 +36,7 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
         self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
         self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
         self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
         self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
         self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
+        self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
 
 
     def test_youtube_channel_matching(self):
     def test_youtube_channel_matching(self):
         assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
         assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])

+ 2 - 0
test/test_dailymotion_subtitles.py

@@ -40,6 +40,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
         subtitles = self.getSubtitles()
         subtitles = self.getSubtitles()
         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
     def test_allsubtitles(self):
     def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         subtitles = self.getSubtitles()
         self.assertEqual(len(subtitles.keys()), 5)
         self.assertEqual(len(subtitles.keys()), 5)
@@ -54,6 +55,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
         self.assertTrue(len(subtitles.keys()) == 0)
         self.assertTrue(len(subtitles.keys()) == 0)
     def test_nosubtitles(self):
     def test_nosubtitles(self):
         self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
         self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
+        self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         subtitles = self.getSubtitles()
         self.assertEqual(len(subtitles), 0)
         self.assertEqual(len(subtitles), 0)

+ 33 - 1
test/test_playlists.py

@@ -1,4 +1,5 @@
 #!/usr/bin/env python
 #!/usr/bin/env python
+# encoding: utf-8
 
 
 import sys
 import sys
 import unittest
 import unittest
@@ -8,7 +9,14 @@ import json
 import os
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
-from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE
+from youtube_dl.extractor import (
+    DailymotionPlaylistIE,
+    DailymotionUserIE,
+    VimeoChannelIE,
+    UstreamChannelIE,
+    SoundcloudUserIE,
+    LivestreamIE,
+)
 from youtube_dl.utils import *
 from youtube_dl.utils import *
 
 
 from helper import FakeYDL
 from helper import FakeYDL
@@ -26,6 +34,14 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], u'SPORT')
         self.assertEqual(result['title'], u'SPORT')
         self.assertTrue(len(result['entries']) > 20)
         self.assertTrue(len(result['entries']) > 20)
 
 
+    def test_dailymotion_user(self):
+        dl = FakeYDL()
+        ie = DailymotionUserIE(dl)
+        result = ie.extract('http://www.dailymotion.com/user/generation-quoi/')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'Génération Quoi')
+        self.assertTrue(len(result['entries']) >= 26)
+
     def test_vimeo_channel(self):
     def test_vimeo_channel(self):
         dl = FakeYDL()
         dl = FakeYDL()
         ie = VimeoChannelIE(dl)
         ie = VimeoChannelIE(dl)
@@ -42,5 +58,21 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['id'], u'5124905')
         self.assertEqual(result['id'], u'5124905')
         self.assertTrue(len(result['entries']) >= 11)
         self.assertTrue(len(result['entries']) >= 11)
 
 
+    def test_soundcloud_user(self):
+        dl = FakeYDL()
+        ie = SoundcloudUserIE(dl)
+        result = ie.extract('https://soundcloud.com/the-concept-band')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'9615865')
+        self.assertTrue(len(result['entries']) >= 12)
+
+    def test_livestream_event(self):
+        dl = FakeYDL()
+        ie = LivestreamIE(dl)
+        result = ie.extract('http://new.livestream.com/tedx/cityenglish')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'TEDCity2.0 (English)')
+        self.assertTrue(len(result['entries']) >= 4)
+
 if __name__ == '__main__':
 if __name__ == '__main__':
     unittest.main()
     unittest.main()

+ 80 - 0
test/test_youtube_signature.py

@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+import io
+import re
+import string
+import sys
+import unittest
+
+# Allow direct execution
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.extractor import YoutubeIE
+from youtube_dl.utils import compat_str, compat_urlretrieve
+
+_TESTS = [
+    (
+        u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
+        u'js',
+        86,
+        u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
+    ),
+    (
+        u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
+        u'js',
+        85,
+        u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
+    ),
+    (
+        u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf',
+        u'swf',
+        82,
+        u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321'
+    ),
+]
+
+
+class TestSignature(unittest.TestCase):
+    def setUp(self):
+        TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+        self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
+        if not os.path.exists(self.TESTDATA_DIR):
+            os.mkdir(self.TESTDATA_DIR)
+
+
+def make_tfunc(url, stype, sig_length, expected_sig):
+    basename = url.rpartition('/')[2]
+    m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
+    assert m, '%r should follow URL format' % basename
+    test_id = m.group(1)
+
+    def test_func(self):
+        fn = os.path.join(self.TESTDATA_DIR, basename)
+
+        if not os.path.exists(fn):
+            compat_urlretrieve(url, fn)
+
+        ie = YoutubeIE()
+        if stype == 'js':
+            with io.open(fn, encoding='utf-8') as testf:
+                jscode = testf.read()
+            func = ie._parse_sig_js(jscode)
+        else:
+            assert stype == 'swf'
+            with open(fn, 'rb') as testf:
+                swfcode = testf.read()
+            func = ie._parse_sig_swf(swfcode)
+        src_sig = compat_str(string.printable[:sig_length])
+        got_sig = func(src_sig)
+        self.assertEqual(got_sig, expected_sig)
+
+    test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
+    setattr(TestSignature, test_func.__name__, test_func)
+
+for test_spec in _TESTS:
+    make_tfunc(*test_spec)
+
+
+if __name__ == '__main__':
+    unittest.main()

+ 2 - 0
test/test_youtube_subtitles.py

@@ -41,6 +41,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
         subtitles = self.getSubtitles()
         subtitles = self.getSubtitles()
         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
     def test_youtube_allsubtitles(self):
     def test_youtube_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         subtitles = self.getSubtitles()
         self.assertEqual(len(subtitles.keys()), 13)
         self.assertEqual(len(subtitles.keys()), 13)
@@ -66,6 +67,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
         self.assertTrue(subtitles['it'] is not None)
         self.assertTrue(subtitles['it'] is not None)
     def test_youtube_nosubtitles(self):
     def test_youtube_nosubtitles(self):
         self.url = 'sAjKT8FhjI8'
         self.url = 'sAjKT8FhjI8'
+        self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         subtitles = self.getSubtitles()
         self.assertEqual(len(subtitles), 0)
         self.assertEqual(len(subtitles), 0)

+ 36 - 10
youtube_dl/FileDownloader.py

@@ -77,26 +77,43 @@ class FileDownloader(object):
     @staticmethod
     @staticmethod
     def calc_percent(byte_counter, data_len):
     def calc_percent(byte_counter, data_len):
         if data_len is None:
         if data_len is None:
+            return None
+        return float(byte_counter) / float(data_len) * 100.0
+
+    @staticmethod
+    def format_percent(percent):
+        if percent is None:
             return '---.-%'
             return '---.-%'
-        return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
+        return '%6s' % ('%3.1f%%' % percent)
 
 
     @staticmethod
     @staticmethod
     def calc_eta(start, now, total, current):
     def calc_eta(start, now, total, current):
         if total is None:
         if total is None:
-            return '--:--'
+            return None
         dif = now - start
         dif = now - start
         if current == 0 or dif < 0.001: # One millisecond
         if current == 0 or dif < 0.001: # One millisecond
-            return '--:--'
+            return None
         rate = float(current) / dif
         rate = float(current) / dif
-        eta = int((float(total) - float(current)) / rate)
+        return int((float(total) - float(current)) / rate)
+
+    @staticmethod
+    def format_eta(eta):
+        if eta is None:
+            return '--:--'
         return FileDownloader.format_seconds(eta)
         return FileDownloader.format_seconds(eta)
 
 
     @staticmethod
     @staticmethod
     def calc_speed(start, now, bytes):
     def calc_speed(start, now, bytes):
         dif = now - start
         dif = now - start
         if bytes == 0 or dif < 0.001: # One millisecond
         if bytes == 0 or dif < 0.001: # One millisecond
+            return None
+        return float(bytes) / dif
+
+    @staticmethod
+    def format_speed(speed):
+        if speed is None:
             return '%10s' % '---b/s'
             return '%10s' % '---b/s'
-        return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
+        return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
 
 
     @staticmethod
     @staticmethod
     def best_block_size(elapsed_time, bytes):
     def best_block_size(elapsed_time, bytes):
@@ -205,11 +222,14 @@ class FileDownloader(object):
         """Report destination filename."""
         """Report destination filename."""
         self.to_screen(u'[download] Destination: ' + filename)
         self.to_screen(u'[download] Destination: ' + filename)
 
 
-    def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
+    def report_progress(self, percent, data_len_str, speed, eta):
         """Report download progress."""
         """Report download progress."""
         if self.params.get('noprogress', False):
         if self.params.get('noprogress', False):
             return
             return
         clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
         clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
+        eta_str = self.format_eta(eta)
+        percent_str = self.format_percent(percent)
+        speed_str = self.format_speed(speed)
         if self.params.get('progress_with_newline', False):
         if self.params.get('progress_with_newline', False):
             self.to_screen(u'[download] %s of %s at %s ETA %s' %
             self.to_screen(u'[download] %s of %s at %s ETA %s' %
                 (percent_str, data_len_str, speed_str, eta_str))
                 (percent_str, data_len_str, speed_str, eta_str))
@@ -378,6 +398,7 @@ class FileDownloader(object):
             self._hook_progress({
             self._hook_progress({
                 'filename': filename,
                 'filename': filename,
                 'status': 'finished',
                 'status': 'finished',
+                'total_bytes': os.path.getsize(encodeFilename(filename)),
             })
             })
             return True
             return True
 
 
@@ -524,13 +545,14 @@ class FileDownloader(object):
                 block_size = self.best_block_size(after - before, len(data_block))
                 block_size = self.best_block_size(after - before, len(data_block))
 
 
             # Progress message
             # Progress message
-            speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
+            speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
             if data_len is None:
             if data_len is None:
                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
+                eta = None
             else:
             else:
-                percent_str = self.calc_percent(byte_counter, data_len)
-                eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
-                self.report_progress(percent_str, data_len_str, speed_str, eta_str)
+                percent = self.calc_percent(byte_counter, data_len)
+                eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
+                self.report_progress(percent, data_len_str, speed, eta)
 
 
             self._hook_progress({
             self._hook_progress({
                 'downloaded_bytes': byte_counter,
                 'downloaded_bytes': byte_counter,
@@ -538,6 +560,8 @@ class FileDownloader(object):
                 'tmpfilename': tmpfilename,
                 'tmpfilename': tmpfilename,
                 'filename': filename,
                 'filename': filename,
                 'status': 'downloading',
                 'status': 'downloading',
+                'eta': eta,
+                'speed': speed,
             })
             })
 
 
             # Apply rate limit
             # Apply rate limit
@@ -580,6 +604,8 @@ class FileDownloader(object):
         * downloaded_bytes: Bytes on disks
         * downloaded_bytes: Bytes on disks
         * total_bytes: Total bytes, None if unknown
         * total_bytes: Total bytes, None if unknown
         * tmpfilename: The filename we're currently writing to
         * tmpfilename: The filename we're currently writing to
+        * eta: The estimated time in seconds, None if unknown
+        * speed: The download speed in bytes/second, None if unknown
 
 
         Hooks are guaranteed to be called at least once (with status "finished")
         Hooks are guaranteed to be called at least once (with status "finished")
         if the download is successful.
         if the download is successful.

+ 4 - 1
youtube_dl/PostProcessor.py

@@ -444,8 +444,11 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
         if information['ext'] != u'mp4':
         if information['ext'] != u'mp4':
             self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
             self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
             return True, information
             return True, information
-        sub_langs = [key for key in information['subtitles']]
+        if not information.get('subtitles'):
+            self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed') 
+            return True, information
 
 
+        sub_langs = [key for key in information['subtitles']]
         filename = information['filepath']
         filename = information['filepath']
         input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
         input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
 
 

+ 27 - 14
youtube_dl/YoutubeDL.py

@@ -74,12 +74,15 @@ class YoutubeDL(object):
     writesubtitles:    Write the video subtitles to a file
     writesubtitles:    Write the video subtitles to a file
     writeautomaticsub: Write the automatic subtitles to a file
     writeautomaticsub: Write the automatic subtitles to a file
     allsubtitles:      Downloads all the subtitles of the video
     allsubtitles:      Downloads all the subtitles of the video
+                       (requires writesubtitles or writeautomaticsub)
     listsubtitles:     Lists all available subtitles for the video
     listsubtitles:     Lists all available subtitles for the video
     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
     subtitleslangs:    List of languages of the subtitles to download
     subtitleslangs:    List of languages of the subtitles to download
     keepvideo:         Keep the video file after post-processing
     keepvideo:         Keep the video file after post-processing
     daterange:         A DateRange object, download only if the upload_date is in the range.
     daterange:         A DateRange object, download only if the upload_date is in the range.
     skip_download:     Skip the actual download of the video file
     skip_download:     Skip the actual download of the video file
+    cachedir:          Location of the cache files in the filesystem.
+                       None to disable filesystem cache.
     
     
     The following parameters are not used by YoutubeDL itself, they are used by
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
     the FileDownloader:
@@ -103,6 +106,17 @@ class YoutubeDL(object):
         self._download_retcode = 0
         self._download_retcode = 0
         self._num_downloads = 0
         self._num_downloads = 0
         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
+
+        if (sys.version_info >= (3,) and sys.platform != 'win32' and
+                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
+                and not params['restrictfilenames']):
+            # On Python 3, the Unicode filesystem API will throw errors (#1474)
+            self.report_warning(
+                u'Assuming --restrict-filenames isnce file system encoding '
+                u'cannot encode all charactes. '
+                u'Set the LC_ALL environment variable to fix this.')
+            params['restrictfilenames'] = True
+
         self.params = params
         self.params = params
         self.fd = FileDownloader(self, self.params)
         self.fd = FileDownloader(self, self.params)
 
 
@@ -141,14 +155,10 @@ class YoutubeDL(object):
 
 
     def to_screen(self, message, skip_eol=False):
     def to_screen(self, message, skip_eol=False):
         """Print message to stdout if not in quiet mode."""
         """Print message to stdout if not in quiet mode."""
-        assert type(message) == type(u'')
         if not self.params.get('quiet', False):
         if not self.params.get('quiet', False):
             terminator = [u'\n', u''][skip_eol]
             terminator = [u'\n', u''][skip_eol]
             output = message + terminator
             output = message + terminator
-            if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
-                output = output.encode(preferredencoding(), 'ignore')
-            self._screen_file.write(output)
-            self._screen_file.flush()
+            write_string(output, self._screen_file)
 
 
     def to_stderr(self, message):
     def to_stderr(self, message):
         """Print message to stderr."""
         """Print message to stderr."""
@@ -499,8 +509,7 @@ class YoutubeDL(object):
                 return
                 return
 
 
         subtitles_are_requested = any([self.params.get('writesubtitles', False),
         subtitles_are_requested = any([self.params.get('writesubtitles', False),
-                                       self.params.get('writeautomaticsub'),
-                                       self.params.get('allsubtitles', False)])
+                                       self.params.get('writeautomaticsub')])
 
 
         if  subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
         if  subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
             # subtitles download errors are already managed as troubles in relevant IE
             # subtitles download errors are already managed as troubles in relevant IE
@@ -536,11 +545,15 @@ class YoutubeDL(object):
                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
                                (info_dict['extractor'], info_dict['id']))
                                (info_dict['extractor'], info_dict['id']))
-                uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
-                with open(thumb_filename, 'wb') as thumbf:
-                    shutil.copyfileobj(uf, thumbf)
-                self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
-                               (info_dict['extractor'], info_dict['id'], thumb_filename))
+                try:
+                    uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
+                    with open(thumb_filename, 'wb') as thumbf:
+                        shutil.copyfileobj(uf, thumbf)
+                    self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
+                        (info_dict['extractor'], info_dict['id'], thumb_filename))
+                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                    self.report_warning(u'Unable to download thumbnail "%s": %s' %
+                        (info_dict['thumbnail'], compat_str(err)))
 
 
         if not self.params.get('skip_download', False):
         if not self.params.get('skip_download', False):
             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
@@ -548,11 +561,11 @@ class YoutubeDL(object):
             else:
             else:
                 try:
                 try:
                     success = self.fd._do_download(filename, info_dict)
                     success = self.fd._do_download(filename, info_dict)
-                except (OSError, IOError) as err:
-                    raise UnavailableVideoError(err)
                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                     self.report_error(u'unable to download video data: %s' % str(err))
                     self.report_error(u'unable to download video data: %s' % str(err))
                     return
                     return
+                except (OSError, IOError) as err:
+                    raise UnavailableVideoError(err)
                 except (ContentTooShortError, ) as err:
                 except (ContentTooShortError, ) as err:
                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
                     return
                     return

+ 34 - 12
youtube_dl/__init__.py

@@ -30,6 +30,7 @@ __authors__  = (
     'Pierre Rudloff',
     'Pierre Rudloff',
     'Huarong Huo',
     'Huarong Huo',
     'Ismael Mejía',
     'Ismael Mejía',
+    'Steffan \'Ruirize\' James',
 )
 )
 
 
 __license__ = 'Public Domain'
 __license__ = 'Public Domain'
@@ -149,7 +150,7 @@ def parseOpts(overrideArguments=None):
     general.add_option('-U', '--update',
     general.add_option('-U', '--update',
             action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
             action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
     general.add_option('-i', '--ignore-errors',
     general.add_option('-i', '--ignore-errors',
-            action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
+            action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
     general.add_option('--dump-user-agent',
     general.add_option('--dump-user-agent',
             action='store_true', dest='dump_user_agent',
             action='store_true', dest='dump_user_agent',
             help='display the current browser identification', default=False)
             help='display the current browser identification', default=False)
@@ -166,6 +167,12 @@ def parseOpts(overrideArguments=None):
             help='Output descriptions of all supported extractors', default=False)
             help='Output descriptions of all supported extractors', default=False)
     general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
     general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
+    general.add_option(
+        '--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache',
+        help='Location in the filesystem where youtube-dl can store downloaded information permanently. %default by default')
+    general.add_option(
+        '--no-cache-dir', action='store_const', const=None, dest='cachedir',
+        help='Disable filesystem caching')
 
 
 
 
     selection.add_option('--playlist-start',
     selection.add_option('--playlist-start',
@@ -271,6 +278,10 @@ def parseOpts(overrideArguments=None):
     verbosity.add_option('--dump-intermediate-pages',
     verbosity.add_option('--dump-intermediate-pages',
             action='store_true', dest='dump_intermediate_pages', default=False,
             action='store_true', dest='dump_intermediate_pages', default=False,
             help='print downloaded pages to debug problems(very verbose)')
             help='print downloaded pages to debug problems(very verbose)')
+    verbosity.add_option('--youtube-print-sig-code',
+            action='store_true', dest='youtube_print_sig_code', default=False,
+            help=optparse.SUPPRESS_HELP)
+
 
 
     filesystem.add_option('-t', '--title',
     filesystem.add_option('-t', '--title',
             action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
             action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
@@ -354,7 +365,7 @@ def parseOpts(overrideArguments=None):
     if overrideArguments is not None:
     if overrideArguments is not None:
         opts, args = parser.parse_args(overrideArguments)
         opts, args = parser.parse_args(overrideArguments)
         if opts.verbose:
         if opts.verbose:
-            sys.stderr.write(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
+            write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
     else:
     else:
         xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
         xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
         if xdg_config_home:
         if xdg_config_home:
@@ -367,9 +378,9 @@ def parseOpts(overrideArguments=None):
         argv = systemConf + userConf + commandLineConf
         argv = systemConf + userConf + commandLineConf
         opts, args = parser.parse_args(argv)
         opts, args = parser.parse_args(argv)
         if opts.verbose:
         if opts.verbose:
-            sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
-            sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
-            sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
+            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
 
 
     return parser, opts, args
     return parser, opts, args
 
 
@@ -392,7 +403,7 @@ def _real_main(argv=None):
         except (IOError, OSError) as err:
         except (IOError, OSError) as err:
             if opts.verbose:
             if opts.verbose:
                 traceback.print_exc()
                 traceback.print_exc()
-            sys.stderr.write(u'ERROR: unable to open cookie file\n')
+            write_string(u'ERROR: unable to open cookie file\n')
             sys.exit(101)
             sys.exit(101)
     # Set user agent
     # Set user agent
     if opts.user_agent is not None:
     if opts.user_agent is not None:
@@ -419,7 +430,7 @@ def _real_main(argv=None):
             batchurls = [x.strip() for x in batchurls]
             batchurls = [x.strip() for x in batchurls]
             batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
             batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
             if opts.verbose:
             if opts.verbose:
-                sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
+                write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
         except IOError:
         except IOError:
             sys.exit(u'ERROR: batch file could not be read')
             sys.exit(u'ERROR: batch file could not be read')
     all_urls = batchurls + args
     all_urls = batchurls + args
@@ -533,6 +544,11 @@ def _real_main(argv=None):
     else:
     else:
         date = DateRange(opts.dateafter, opts.datebefore)
         date = DateRange(opts.dateafter, opts.datebefore)
 
 
+    # --all-sub automatically sets --write-sub if --write-auto-sub is not given
+    # this was the old behaviour if only --all-sub was given.
+    if opts.allsubtitles and (opts.writeautomaticsub == False):
+        opts.writesubtitles = True
+
     if sys.version_info < (3,):
     if sys.version_info < (3,):
         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
         if opts.outtmpl is not None:
         if opts.outtmpl is not None:
@@ -545,6 +561,10 @@ def _real_main(argv=None):
             or (opts.useid and u'%(id)s.%(ext)s')
             or (opts.useid and u'%(id)s.%(ext)s')
             or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
             or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
             or u'%(title)s-%(id)s.%(ext)s')
             or u'%(title)s-%(id)s.%(ext)s')
+    if '%(ext)s' not in outtmpl and opts.extractaudio:
+        parser.error(u'Cannot download a video and extract audio into the same'
+                     u' file! Use "%%(ext)s" instead of %r' %
+                     determine_ext(outtmpl, u''))
 
 
     # YoutubeDL
     # YoutubeDL
     ydl = YoutubeDL({
     ydl = YoutubeDL({
@@ -603,10 +623,12 @@ def _real_main(argv=None):
         'min_filesize': opts.min_filesize,
         'min_filesize': opts.min_filesize,
         'max_filesize': opts.max_filesize,
         'max_filesize': opts.max_filesize,
         'daterange': date,
         'daterange': date,
+        'cachedir': opts.cachedir,
+        'youtube_print_sig_code': opts.youtube_print_sig_code,
         })
         })
 
 
     if opts.verbose:
     if opts.verbose:
-        sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n')
+        write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
         try:
         try:
             sp = subprocess.Popen(
             sp = subprocess.Popen(
                 ['git', 'rev-parse', '--short', 'HEAD'],
                 ['git', 'rev-parse', '--short', 'HEAD'],
@@ -615,14 +637,14 @@ def _real_main(argv=None):
             out, err = sp.communicate()
             out, err = sp.communicate()
             out = out.decode().strip()
             out = out.decode().strip()
             if re.match('[0-9a-f]+', out):
             if re.match('[0-9a-f]+', out):
-                sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n')
+                write_string(u'[debug] Git HEAD: ' + out + u'\n')
         except:
         except:
             try:
             try:
                 sys.exc_clear()
                 sys.exc_clear()
             except:
             except:
                 pass
                 pass
-        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
-        sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
+        write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
+        write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
 
 
     ydl.add_default_info_extractors()
     ydl.add_default_info_extractors()
 
 
@@ -636,7 +658,7 @@ def _real_main(argv=None):
 
 
     # Update version
     # Update version
     if opts.update_self:
     if opts.update_self:
-        update_self(ydl.to_screen, opts.verbose, sys.argv[0])
+        update_self(ydl.to_screen, opts.verbose)
 
 
     # Maybe do nothing
     # Maybe do nothing
     if len(all_urls) < 1:
     if len(all_urls) < 1:

+ 17 - 2
youtube_dl/extractor/__init__.py

@@ -6,6 +6,7 @@ from .arte import ArteTvIE
 from .auengine import AUEngineIE
 from .auengine import AUEngineIE
 from .bandcamp import BandcampIE
 from .bandcamp import BandcampIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bliptv import BlipTVIE, BlipTVUserIE
+from .bloomberg import BloombergIE
 from .breakcom import BreakIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
 from .brightcove import BrightcoveIE
 from .c56 import C56IE
 from .c56 import C56IE
@@ -17,21 +18,32 @@ from .comedycentral import ComedyCentralIE
 from .condenast import CondeNastIE
 from .condenast import CondeNastIE
 from .criterion import CriterionIE
 from .criterion import CriterionIE
 from .cspan import CSpanIE
 from .cspan import CSpanIE
-from .dailymotion import DailymotionIE, DailymotionPlaylistIE
+from .dailymotion import (
+    DailymotionIE,
+    DailymotionPlaylistIE,
+    DailymotionUserIE,
+)
 from .daum import DaumIE
 from .daum import DaumIE
 from .depositfiles import DepositFilesIE
 from .depositfiles import DepositFilesIE
 from .dotsub import DotsubIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
 from .dreisat import DreiSatIE
 from .defense import DefenseGouvFrIE
 from .defense import DefenseGouvFrIE
+from .ebaumsworld import EbaumsWorldIE
 from .ehow import EHowIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .eighttracks import EightTracksIE
 from .escapist import EscapistIE
 from .escapist import EscapistIE
 from .exfm import ExfmIE
 from .exfm import ExfmIE
 from .facebook import FacebookIE
 from .facebook import FacebookIE
+from .fktv import (
+    FKTVIE,
+    FKTVPosteckeIE,
+)
 from .flickr import FlickrIE
 from .flickr import FlickrIE
 from .francetv import (
 from .francetv import (
     PluzzIE,
     PluzzIE,
     FranceTvInfoIE,
     FranceTvInfoIE,
+    France2IE,
+    GenerationQuoiIE
 )
 )
 from .freesound import FreesoundIE
 from .freesound import FreesoundIE
 from .funnyordie import FunnyOrDieIE
 from .funnyordie import FunnyOrDieIE
@@ -67,6 +79,7 @@ from .myvideo import MyVideoIE
 from .naver import NaverIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nba import NBAIE
 from .nbc import NBCNewsIE
 from .nbc import NBCNewsIE
+from .newgrounds import NewgroundsIE
 from .ooyala import OoyalaIE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
 from .orf import ORFIE
 from .pbs import PBSIE
 from .pbs import PBSIE
@@ -82,7 +95,8 @@ from .sina import SinaIE
 from .slashdot import SlashdotIE
 from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
 from .slideshare import SlideshareIE
 from .sohu import SohuIE
 from .sohu import SohuIE
-from .soundcloud import SoundcloudIE, SoundcloudSetIE
+from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
+from .southparkstudios import SouthParkStudiosIE
 from .spiegel import SpiegelIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .statigram import StatigramIE
 from .statigram import StatigramIE
@@ -102,6 +116,7 @@ from .vbox7 import Vbox7IE
 from .veehd import VeeHDIE
 from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .vevo import VevoIE
+from .vice import ViceIE
 from .videofyme import VideofyMeIE
 from .videofyme import VideofyMeIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .vine import VineIE

+ 42 - 70
youtube_dl/extractor/appletrailers.py

@@ -1,8 +1,10 @@
 import re
 import re
 import xml.etree.ElementTree
 import xml.etree.ElementTree
+import json
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
+    compat_urlparse,
     determine_ext,
     determine_ext,
 )
 )
 
 
@@ -14,10 +16,9 @@ class AppleTrailersIE(InfoExtractor):
         u"playlist": [
         u"playlist": [
             {
             {
                 u"file": u"manofsteel-trailer4.mov",
                 u"file": u"manofsteel-trailer4.mov",
-                u"md5": u"11874af099d480cc09e103b189805d5f",
+                u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8",
                 u"info_dict": {
                 u"info_dict": {
                     u"duration": 111,
                     u"duration": 111,
-                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
                     u"title": u"Trailer 4",
                     u"title": u"Trailer 4",
                     u"upload_date": u"20130523",
                     u"upload_date": u"20130523",
                     u"uploader_id": u"wb",
                     u"uploader_id": u"wb",
@@ -25,10 +26,9 @@ class AppleTrailersIE(InfoExtractor):
             },
             },
             {
             {
                 u"file": u"manofsteel-trailer3.mov",
                 u"file": u"manofsteel-trailer3.mov",
-                u"md5": u"07a0a262aae5afe68120eed61137ab34",
+                u"md5": u"b8017b7131b721fb4e8d6f49e1df908c",
                 u"info_dict": {
                 u"info_dict": {
                     u"duration": 182,
                     u"duration": 182,
-                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
                     u"title": u"Trailer 3",
                     u"title": u"Trailer 3",
                     u"upload_date": u"20130417",
                     u"upload_date": u"20130417",
                     u"uploader_id": u"wb",
                     u"uploader_id": u"wb",
@@ -36,10 +36,9 @@ class AppleTrailersIE(InfoExtractor):
             },
             },
             {
             {
                 u"file": u"manofsteel-trailer.mov",
                 u"file": u"manofsteel-trailer.mov",
-                u"md5": u"e401fde0813008e3307e54b6f384cff1",
+                u"md5": u"d0f1e1150989b9924679b441f3404d48",
                 u"info_dict": {
                 u"info_dict": {
                     u"duration": 148,
                     u"duration": 148,
-                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
                     u"title": u"Trailer",
                     u"title": u"Trailer",
                     u"upload_date": u"20121212",
                     u"upload_date": u"20121212",
                     u"uploader_id": u"wb",
                     u"uploader_id": u"wb",
@@ -47,10 +46,9 @@ class AppleTrailersIE(InfoExtractor):
             },
             },
             {
             {
                 u"file": u"manofsteel-teaser.mov",
                 u"file": u"manofsteel-teaser.mov",
-                u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
+                u"md5": u"5fe08795b943eb2e757fa95cb6def1cb",
                 u"info_dict": {
                 u"info_dict": {
                     u"duration": 93,
                     u"duration": 93,
-                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
                     u"title": u"Teaser",
                     u"title": u"Teaser",
                     u"upload_date": u"20120721",
                     u"upload_date": u"20120721",
                     u"uploader_id": u"wb",
                     u"uploader_id": u"wb",
@@ -59,87 +57,61 @@ class AppleTrailersIE(InfoExtractor):
         ]
         ]
     }
     }
 
 
+    _JSON_RE = r'iTunes.playURL\((.*?)\);'
+
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
         movie = mobj.group('movie')
         movie = mobj.group('movie')
         uploader_id = mobj.group('company')
         uploader_id = mobj.group('company')
 
 
-        playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
+        playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
         playlist_snippet = self._download_webpage(playlist_url, movie)
         playlist_snippet = self._download_webpage(playlist_url, movie)
-        playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
+        playlist_cleaned = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', playlist_snippet)
+        playlist_cleaned = re.sub(r'<img ([^<]*?)>', r'<img \1/>', playlist_cleaned)
+        # The ' in the onClick attributes are not escaped, it couldn't be parsed
+        # with xml.etree.ElementTree.fromstring
+        # like: http://trailers.apple.com/trailers/wb/gravity/
+        def _clean_json(m):
+            return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
+        playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned)
         playlist_html = u'<html>' + playlist_cleaned + u'</html>'
         playlist_html = u'<html>' + playlist_cleaned + u'</html>'
 
 
-        size_cache = {}
-
         doc = xml.etree.ElementTree.fromstring(playlist_html)
         doc = xml.etree.ElementTree.fromstring(playlist_html)
         playlist = []
         playlist = []
         for li in doc.findall('./div/ul/li'):
         for li in doc.findall('./div/ul/li'):
-            title = li.find('.//h3').text
+            on_click = li.find('.//a').attrib['onClick']
+            trailer_info_json = self._search_regex(self._JSON_RE,
+                on_click, u'trailer info')
+            trailer_info = json.loads(trailer_info_json)
+            title = trailer_info['title']
             video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
             video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
             thumbnail = li.find('.//img').attrib['src']
             thumbnail = li.find('.//img').attrib['src']
+            upload_date = trailer_info['posted'].replace('-', '')
 
 
-            date_el = li.find('.//p')
-            upload_date = None
-            m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
-            if m:
-                upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
-            runtime_el = date_el.find('./br')
-            m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
+            runtime = trailer_info['runtime']
+            m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime)
             duration = None
             duration = None
             if m:
             if m:
                 duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
                 duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
 
 
-            formats = []
-            for formats_el in li.findall('.//a'):
-                if formats_el.attrib['class'] != 'OverlayPanel':
-                    continue
-                target = formats_el.attrib['target']
-
-                format_code = formats_el.text
-                if 'Automatic' in format_code:
-                    continue
+            first_url = trailer_info['url']
+            trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
+            settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
+            settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
+            settings = json.loads(settings_json)
 
 
-                size_q = formats_el.attrib['href']
-                size_id = size_q.rpartition('#videos-')[2]
-                if size_id not in size_cache:
-                    size_url = url + size_q
-                    sizepage_html = self._download_webpage(
-                        size_url, movie,
-                        note=u'Downloading size info %s' % size_id,
-                        errnote=u'Error while downloading size info %s' % size_id,
-                    )
-                    _doc = xml.etree.ElementTree.fromstring(sizepage_html)
-                    size_cache[size_id] = _doc
-
-                sizepage_doc = size_cache[size_id]
-                links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
-                for vid_a in links:
-                    href = vid_a.get('href')
-                    if not href.endswith(target):
-                        continue
-                    detail_q = href.partition('#')[0]
-                    detail_url = url + '/' + detail_q
-
-                    m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
-                    detail_id = m.group('detail_id')
-
-                    detail_html = self._download_webpage(
-                        detail_url, movie,
-                        note=u'Downloading detail %s %s' % (detail_id, size_id),
-                        errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
-                    )
-                    detail_doc = xml.etree.ElementTree.fromstring(detail_html)
-                    movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
-                    assert movie_link_el.get('class') == 'movieLink'
-                    movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
-                    ext = determine_ext(movie_link)
-                    assert ext == 'mov'
-
-                    formats.append({
-                        'format': format_code,
-                        'ext': ext,
-                        'url': movie_link,
-                    })
+            formats = []
+            for format in settings['metadata']['sizes']:
+                # The src is a file pointing to the real video file
+                format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
+                formats.append({
+                    'url': format_url,
+                    'ext': determine_ext(format_url),
+                    'format': format['type'],
+                    'width': format['width'],
+                    'height': int(format['height']),
+                })
+            formats = sorted(formats, key=lambda f: (f['height'], f['width']))
 
 
             info = {
             info = {
                 '_type': 'video',
                 '_type': 'video',

+ 4 - 3
youtube_dl/extractor/archiveorg.py

@@ -46,6 +46,8 @@ class ArchiveOrgIE(InfoExtractor):
             for fn,fdata in data['files'].items()
             for fn,fdata in data['files'].items()
             if 'Video' in fdata['format']]
             if 'Video' in fdata['format']]
         formats.sort(key=lambda fdata: fdata['file_size'])
         formats.sort(key=lambda fdata: fdata['file_size'])
+        for f in formats:
+            f['ext'] = determine_ext(f['url'])
 
 
         info = {
         info = {
             '_type': 'video',
             '_type': 'video',
@@ -61,7 +63,6 @@ class ArchiveOrgIE(InfoExtractor):
             info['thumbnail'] = thumbnail
             info['thumbnail'] = thumbnail
 
 
         # TODO: Remove when #980 has been merged
         # TODO: Remove when #980 has been merged
-        info['url'] = formats[-1]['url']
-        info['ext'] = determine_ext(formats[-1]['url'])
+        info.update(formats[-1])
 
 
-        return info
+        return info

+ 27 - 0
youtube_dl/extractor/bloomberg.py

@@ -0,0 +1,27 @@
+import re
+
+from .common import InfoExtractor
+
+
+class BloombergIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html'
+
+    _TEST = {
+        u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+        u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
+        u'info_dict': {
+            u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
+            u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
+        },
+        u'params': {
+            # Requires ffmpeg (m3u8 manifest)
+            u'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        name = mobj.group('name')
+        webpage = self._download_webpage(url, name)
+        ooyala_url = self._og_search_video_url(webpage)
+        return self.url_result(ooyala_url, ie='Ooyala')

+ 51 - 13
youtube_dl/extractor/brightcove.py

@@ -1,3 +1,5 @@
+# encoding: utf-8
+
 import re
 import re
 import json
 import json
 import xml.etree.ElementTree
 import xml.etree.ElementTree
@@ -7,15 +9,39 @@ from ..utils import (
     compat_urllib_parse,
     compat_urllib_parse,
     find_xpath_attr,
     find_xpath_attr,
     compat_urlparse,
     compat_urlparse,
+
+    ExtractorError,
 )
 )
 
 
 class BrightcoveIE(InfoExtractor):
 class BrightcoveIE(InfoExtractor):
     _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
     _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
     _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
     _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
-    
-    # There is a test for Brigtcove in GenericIE, that way we test both the download
-    # and the detection of videos, and we don't have to find an URL that is always valid
+
+    _TESTS = [
+        {
+            # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
+            u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
+            u'file': u'2371591881001.mp4',
+            u'md5': u'9e80619e0a94663f0bdc849b4566af19',
+            u'note': u'Test Brightcove downloads and detection in GenericIE',
+            u'info_dict': {
+                u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
+                u'uploader': u'8TV',
+                u'description': u'md5:a950cc4285c43e44d763d036710cd9cd',
+            }
+        },
+        {
+            # From http://medianetwork.oracle.com/video/player/1785452137001
+            u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
+            u'file': u'1785452137001.flv',
+            u'info_dict': {
+                u'title': u'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
+                u'description': u'John Rose speaks at the JVM Language Summit, August 1, 2012.',
+                u'uploader': u'Oracle',
+            },
+        },
+    ]
 
 
     @classmethod
     @classmethod
     def _build_brighcove_url(cls, object_str):
     def _build_brighcove_url(cls, object_str):
@@ -72,15 +98,27 @@ class BrightcoveIE(InfoExtractor):
                                     playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
                                     playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
 
 
     def _extract_video_info(self, video_info):
     def _extract_video_info(self, video_info):
-        renditions = video_info['renditions']
-        renditions = sorted(renditions, key=lambda r: r['size'])
-        best_format = renditions[-1]
+        info = {
+            'id': video_info['id'],
+            'title': video_info['displayName'],
+            'description': video_info.get('shortDescription'),
+            'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
+            'uploader': video_info.get('publisherName'),
+        }
 
 
-        return {'id': video_info['id'],
-                'title': video_info['displayName'],
-                'url': best_format['defaultURL'], 
+        renditions = video_info.get('renditions')
+        if renditions:
+            renditions = sorted(renditions, key=lambda r: r['size'])
+            best_format = renditions[-1]
+            info.update({
+                'url': best_format['defaultURL'],
                 'ext': 'mp4',
                 'ext': 'mp4',
-                'description': video_info.get('shortDescription'),
-                'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
-                'uploader': video_info.get('publisherName'),
-                }
+            })
+        elif video_info.get('FLVFullLengthURL') is not None:
+            info.update({
+                'url': video_info['FLVFullLengthURL'],
+                'ext': 'flv',
+            })
+        else:
+            raise ExtractorError(u'Unable to extract video url for %s' % info['id'])
+        return info

+ 49 - 13
youtube_dl/extractor/dailymotion.py

@@ -14,8 +14,15 @@ from ..utils import (
     ExtractorError,
     ExtractorError,
 )
 )
 
 
+class DailymotionBaseInfoExtractor(InfoExtractor):
+    @staticmethod
+    def _build_request(url):
+        """Build a request with the family filter disabled"""
+        request = compat_urllib_request.Request(url)
+        request.add_header('Cookie', 'family_filter=off')
+        return request
 
 
-class DailymotionIE(SubtitlesInfoExtractor):
+class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
     """Information Extractor for Dailymotion"""
     """Information Extractor for Dailymotion"""
 
 
     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
@@ -40,8 +47,7 @@ class DailymotionIE(SubtitlesInfoExtractor):
         url = 'http://www.dailymotion.com/video/%s' % video_id
         url = 'http://www.dailymotion.com/video/%s' % video_id
 
 
         # Retrieve video webpage to extract further information
         # Retrieve video webpage to extract further information
-        request = compat_urllib_request.Request(url)
-        request.add_header('Cookie', 'family_filter=off')
+        request = self._build_request(url)
         webpage = self._download_webpage(request, video_id)
         webpage = self._download_webpage(request, video_id)
 
 
         # Extract URL, uploader and title from webpage
         # Extract URL, uploader and title from webpage
@@ -63,6 +69,9 @@ class DailymotionIE(SubtitlesInfoExtractor):
         info = self._search_regex(r'var info = ({.*?}),$', embed_page,
         info = self._search_regex(r'var info = ({.*?}),$', embed_page,
             'video info', flags=re.MULTILINE)
             'video info', flags=re.MULTILINE)
         info = json.loads(info)
         info = json.loads(info)
+        if info.get('error') is not None:
+            msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
+            raise ExtractorError(msg, expected=True)
 
 
         # TODO: support choosing qualities
         # TODO: support choosing qualities
 
 
@@ -110,29 +119,56 @@ class DailymotionIE(SubtitlesInfoExtractor):
         return {}
         return {}
 
 
 
 
-class DailymotionPlaylistIE(InfoExtractor):
+class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
+    IE_NAME = u'dailymotion:playlist'
     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
     _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
     _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
+    _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
 
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        playlist_id =  mobj.group('id')
+    def _extract_entries(self, id):
         video_ids = []
         video_ids = []
-
         for pagenum in itertools.count(1):
         for pagenum in itertools.count(1):
-            webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
-                                             playlist_id, u'Downloading page %s' % pagenum)
+            request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
+            webpage = self._download_webpage(request,
+                                             id, u'Downloading page %s' % pagenum)
 
 
             playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
             playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
             video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
             video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
 
 
             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                 break
                 break
-
-        entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
+        return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
                    for video_id in video_ids]
                    for video_id in video_ids]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        webpage = self._download_webpage(url, playlist_id)
+
         return {'_type': 'playlist',
         return {'_type': 'playlist',
                 'id': playlist_id,
                 'id': playlist_id,
                 'title': get_element_by_id(u'playlist_name', webpage),
                 'title': get_element_by_id(u'playlist_name', webpage),
-                'entries': entries,
+                'entries': self._extract_entries(playlist_id),
                 }
                 }
+
+
+class DailymotionUserIE(DailymotionPlaylistIE):
+    IE_NAME = u'dailymotion:user'
+    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
+    _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>'
+    _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user = mobj.group('user')
+        webpage = self._download_webpage(url, user)
+        full_user = self._html_search_regex(
+            r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user),
+            webpage, u'user', flags=re.DOTALL)
+
+        return {
+            '_type': 'playlist',
+            'id': user,
+            'title': full_user,
+            'entries': self._extract_entries(user),
+        }

+ 3 - 3
youtube_dl/extractor/dreisat.py

@@ -54,6 +54,7 @@ class DreiSatIE(InfoExtractor):
             'width': int(fe.find('./width').text),
             'width': int(fe.find('./width').text),
             'height': int(fe.find('./height').text),
             'height': int(fe.find('./height').text),
             'url': fe.find('./url').text,
             'url': fe.find('./url').text,
+            'ext': determine_ext(fe.find('./url').text),
             'filesize': int(fe.find('./filesize').text),
             'filesize': int(fe.find('./filesize').text),
             'video_bitrate': int(fe.find('./videoBitrate').text),
             'video_bitrate': int(fe.find('./videoBitrate').text),
             '3sat_qualityname': fe.find('./quality').text,
             '3sat_qualityname': fe.find('./quality').text,
@@ -79,7 +80,6 @@ class DreiSatIE(InfoExtractor):
         }
         }
 
 
         # TODO: Remove when #980 has been merged
         # TODO: Remove when #980 has been merged
-        info['url'] = formats[-1]['url']
-        info['ext'] = determine_ext(formats[-1]['url'])
+        info.update(formats[-1])
 
 
-        return info
+        return info

+ 37 - 0
youtube_dl/extractor/ebaumsworld.py

@@ -0,0 +1,37 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class EbaumsWorldIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.ebaumsworld.com/video/watch/83367677/',
+        u'file': u'83367677.mp4',
+        u'info_dict': {
+            u'title': u'A Giant Python Opens The Door',
+            u'description': u'This is how nightmares start...',
+            u'uploader': u'jihadpizza',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        config_xml = self._download_webpage(
+            'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
+        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
+        video_url = config.find('file').text
+
+        return {
+            'id': video_id,
+            'title': config.find('title').text,
+            'url': video_url,
+            'ext': determine_ext(video_url),
+            'description': config.find('description').text,
+            'thumbnail': config.find('image').text,
+            'uploader': config.find('username').text,
+        }

+ 2 - 2
youtube_dl/extractor/facebook.py

@@ -106,8 +106,8 @@ class FacebookIE(InfoExtractor):
         video_duration = int(video_data['video_duration'])
         video_duration = int(video_data['video_duration'])
         thumbnail = video_data['thumbnail_src']
         thumbnail = video_data['thumbnail_src']
 
 
-        video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>',
-            webpage, u'title')
+        video_title = self._html_search_regex(
+            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title')
 
 
         info = {
         info = {
             'id': video_id,
             'id': video_id,

+ 79 - 0
youtube_dl/extractor/fktv.py

@@ -0,0 +1,79 @@
+import re
+import random
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    get_element_by_id,
+    clean_html,
+)
+
+
+class FKTVIE(InfoExtractor):
+    IE_NAME = u'fernsehkritik.tv'
+    _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
+
+    _TEST = {
+        u'url': u'http://fernsehkritik.tv/folge-1',
+        u'file': u'00011.flv',
+        u'info_dict': {
+            u'title': u'Folge 1 vom 10. April 2007',
+            u'description': u'md5:fb4818139c7cfe6907d4b83412a6864f',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        episode = int(mobj.group('ep'))
+
+        server = random.randint(2, 4)
+        video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
+        start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
+            episode)
+        playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
+            u'playlist', flags=re.DOTALL)
+        files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
+        # TODO: return a single multipart video
+        videos = []
+        for i, _ in enumerate(files, 1):
+            video_id = '%04d%d' % (episode, i)
+            video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
+            video_title = 'Fernsehkritik %d.%d' % (episode, i)
+            videos.append({
+                'id': video_id,
+                'url': video_url,
+                'ext': determine_ext(video_url),
+                'title': clean_html(get_element_by_id('eptitle', start_webpage)),
+                'description': clean_html(get_element_by_id('contentlist', start_webpage)),
+                'thumbnail': video_thumbnail
+            })
+        return videos
+
+
+class FKTVPosteckeIE(InfoExtractor):
+    IE_NAME = u'fernsehkritik.tv:postecke'
+    _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
+    _TEST = {
+        u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
+        u'file': u'0120.flv',
+        u'md5': u'262f0adbac80317412f7e57b4808e5c4',
+        u'info_dict': {
+            u"title": u"Postecke 120"
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        episode = int(mobj.group('ep'))
+
+        server = random.randint(2, 4)
+        video_id = '%04d' % episode
+        video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode)
+        video_title = 'Postecke %d' % episode
+        return {
+            'id':       video_id,
+            'url':      video_url,
+            'ext':      determine_ext(video_url),
+            'title':    video_title,
+        }

+ 51 - 11
youtube_dl/extractor/francetv.py

@@ -1,6 +1,7 @@
 # encoding: utf-8
 # encoding: utf-8
 import re
 import re
 import xml.etree.ElementTree
 import xml.etree.ElementTree
+import json
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
@@ -34,17 +35,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
     IE_NAME = u'pluzz.francetv.fr'
     IE_NAME = u'pluzz.francetv.fr'
     _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
     _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
 
 
-    _TEST = {
-        u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html',
-        u'file': u'88439064.mp4',
-        u'info_dict': {
-            u'title': u'Allô Rufo',
-            u'description': u'md5:d909f1ebdf963814b65772aea250400e',
-        },
-        u'params': {
-            u'skip_download': True,
-        },
-    }
+    # Can't use tests, videos expire in 7 days
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         title = re.match(self._VALID_URL, url).group(1)
         title = re.match(self._VALID_URL, url).group(1)
@@ -75,3 +66,52 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
         webpage = self._download_webpage(url, page_title)
         webpage = self._download_webpage(url, page_title)
         video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
         video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
         return self._extract_video(video_id)
         return self._extract_video(video_id)
+
+
+class France2IE(FranceTVBaseInfoExtractor):
+    IE_NAME = u'france2.fr'
+    _VALID_URL = r'https?://www\.france2\.fr/emissions/.*?/videos/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
+        u'file': u'75540104.mp4',
+        u'info_dict': {
+            u'title': u'13h15, le samedi...',
+            u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
+        },
+        u'params': {
+            u'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        return self._extract_video(video_id)
+
+
+class GenerationQuoiIE(InfoExtractor):
+    IE_NAME = u'http://generation-quoi.france2.fr'
+    _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
+
+    _TEST = {
+        u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous',
+        u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4',
+        u'info_dict': {
+            u'title': u'Génération Quoi - Garde à Vous',
+            u'uploader': u'Génération Quoi',
+        },
+        u'params': {
+            # It uses Dailymotion
+            u'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        name = mobj.group('name')
+        info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name)
+        info_json = self._download_webpage(info_url, name)
+        info = json.loads(info_json)
+        return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
+            ie='Dailymotion')

+ 2 - 1
youtube_dl/extractor/funnyordie.py

@@ -21,7 +21,8 @@ class FunnyOrDieIE(InfoExtractor):
         video_id = mobj.group('id')
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
         webpage = self._download_webpage(url, video_id)
 
 
-        video_url = self._search_regex(r'type="video/mp4" src="(.*?)"',
+        video_url = self._search_regex(
+            [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
             webpage, u'video URL', flags=re.DOTALL)
             webpage, u'video URL', flags=re.DOTALL)
 
 
         info = {
         info = {

+ 0 - 11
youtube_dl/extractor/generic.py

@@ -29,17 +29,6 @@ class GenericIE(InfoExtractor):
                 u"title": u"R\u00e9gis plante sa Jeep"
                 u"title": u"R\u00e9gis plante sa Jeep"
             }
             }
         },
         },
-        {
-            u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/',
-            u'file': u'2371591881001.mp4',
-            u'md5': u'9e80619e0a94663f0bdc849b4566af19',
-            u'note': u'Test Brightcove downloads and detection in GenericIE',
-            u'info_dict': {
-                u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
-                u'uploader': u'8TV',
-                u'description': u'md5:a950cc4285c43e44d763d036710cd9cd',
-            }
-        },
     ]
     ]
 
 
     def report_download_webpage(self, video_id):
     def report_download_webpage(self, video_id):

+ 2 - 1
youtube_dl/extractor/googleplus.py

@@ -40,7 +40,8 @@ class GooglePlusIE(InfoExtractor):
         self.report_extraction(video_id)
         self.report_extraction(video_id)
 
 
         # Extract update date
         # Extract update date
-        upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
+        upload_date = self._html_search_regex(
+            ['title="Timestamp">(.*?)</a>', r'<a.+?class="g-M.+?>(.+?)</a>'],
             webpage, u'upload date', fatal=False)
             webpage, u'upload date', fatal=False)
         if upload_date:
         if upload_date:
             # Convert timestring to a format suitable for filename
             # Convert timestring to a format suitable for filename

+ 2 - 2
youtube_dl/extractor/hotnewhiphop.py

@@ -7,11 +7,11 @@ from .common import InfoExtractor
 class HotNewHipHopIE(InfoExtractor):
 class HotNewHipHopIE(InfoExtractor):
     _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html'
     _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html'
     _TEST = {
     _TEST = {
-        u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'",
+        u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html",
         u'file': u'1435540.mp3',
         u'file': u'1435540.mp3',
         u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
         u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
         u'info_dict': {
         u'info_dict': {
-            u"title": u"Freddie Gibbs Songs - Lay It Down"
+            u"title": u"Freddie Gibbs - Lay It Down"
         }
         }
     }
     }
 
 

+ 11 - 3
youtube_dl/extractor/livestream.py

@@ -2,7 +2,12 @@ import re
 import json
 import json
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
-from ..utils import compat_urllib_parse_urlparse, compat_urlparse
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urlparse,
+    get_meta_content,
+    ExtractorError,
+)
 
 
 
 
 class LivestreamIE(InfoExtractor):
 class LivestreamIE(InfoExtractor):
@@ -35,8 +40,11 @@ class LivestreamIE(InfoExtractor):
 
 
         if video_id is None:
         if video_id is None:
             # This is an event page:
             # This is an event page:
-            api_url = self._search_regex(r'event_design_eventId: \'(.+?)\'',
-                                         webpage, 'api url')
+            player = get_meta_content('twitter:player', webpage)
+            if player is None:
+                raise ExtractorError('Couldn\'t extract event api url')
+            api_url = player.replace('/player', '')
+            api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url)
             info = json.loads(self._download_webpage(api_url, event_name,
             info = json.loads(self._download_webpage(api_url, event_name,
                                                      u'Downloading event info'))
                                                      u'Downloading event info'))
             videos = [self._extract_video_info(video_data['data'])
             videos = [self._extract_video_info(video_data['data'])

+ 38 - 84
youtube_dl/extractor/mixcloud.py

@@ -5,34 +5,27 @@ import socket
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
     compat_http_client,
     compat_http_client,
-    compat_str,
     compat_urllib_error,
     compat_urllib_error,
     compat_urllib_request,
     compat_urllib_request,
-
-    ExtractorError,
+    unified_strdate,
 )
 )
 
 
 
 
 class MixcloudIE(InfoExtractor):
 class MixcloudIE(InfoExtractor):
-    _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
     IE_NAME = u'mixcloud'
     IE_NAME = u'mixcloud'
 
 
-    def report_download_json(self, file_id):
-        """Report JSON download."""
-        self.to_screen(u'Downloading json')
-
-    def get_urls(self, jsonData, fmt, bitrate='best'):
-        """Get urls from 'audio_formats' section in json"""
-        try:
-            bitrate_list = jsonData[fmt]
-            if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
-                bitrate = max(bitrate_list) # select highest
-
-            url_list = jsonData[fmt][bitrate]
-        except TypeError: # we have no bitrate info.
-            url_list = jsonData[fmt]
-        return url_list
+    _TEST = {
+        u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/',
+        u'file': u'dholbach-cryptkeeper.mp3',
+        u'info_dict': {
+            u'title': u'Cryptkeeper',
+            u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
+            u'uploader': u'Daniel Holbach',
+            u'uploader_id': u'dholbach',
+            u'upload_date': u'20111115',
+        },
+    }
 
 
     def check_urls(self, url_list):
     def check_urls(self, url_list):
         """Returns 1st active url from list"""
         """Returns 1st active url from list"""
@@ -45,71 +38,32 @@ class MixcloudIE(InfoExtractor):
 
 
         return None
         return None
 
 
-    def _print_formats(self, formats):
-        print('Available formats:')
-        for fmt in formats.keys():
-            for b in formats[fmt]:
-                try:
-                    ext = formats[fmt][b][0]
-                    print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
-                except TypeError: # we have no bitrate info
-                    ext = formats[fmt][0]
-                    print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
-                    break
-
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-        # extract uploader & filename from url
-        uploader = mobj.group(1).decode('utf-8')
-        file_id = uploader + "-" + mobj.group(2).decode('utf-8')
-
-        # construct API request
-        file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
-        # retrieve .json file with links to files
-        request = compat_urllib_request.Request(file_url)
-        try:
-            self.report_download_json(file_url)
-            jsonData = compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err))
-
-        # parse JSON
-        json_data = json.loads(jsonData)
-        player_url = json_data['player_swf_url']
-        formats = dict(json_data['audio_formats'])
-
-        req_format = self._downloader.params.get('format', None)
-
-        if self._downloader.params.get('listformats', None):
-            self._print_formats(formats)
-            return
-
-        if req_format is None or req_format == 'best':
-            for format_param in formats.keys():
-                url_list = self.get_urls(formats, format_param)
-                # check urls
-                file_url = self.check_urls(url_list)
-                if file_url is not None:
-                    break # got it!
-        else:
-            if req_format not in formats:
-                raise ExtractorError(u'Format is not available')
-
-            url_list = self.get_urls(formats, req_format)
-            file_url = self.check_urls(url_list)
-            format_param = req_format
 
 
-        return [{
-            'id': file_id.decode('utf-8'),
-            'url': file_url.decode('utf-8'),
-            'uploader': uploader.decode('utf-8'),
-            'upload_date': None,
-            'title': json_data['name'],
-            'ext': file_url.split('.')[-1].decode('utf-8'),
-            'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
-            'thumbnail': json_data['thumbnail_url'],
-            'description': json_data['description'],
-            'player_url': player_url.decode('utf-8'),
-        }]
+        uploader = mobj.group(1)
+        cloudcast_name = mobj.group(2)
+        track_id = '-'.join((uploader, cloudcast_name))
+        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
+        webpage = self._download_webpage(url, track_id)
+        json_data = self._download_webpage(api_url, track_id,
+            u'Downloading cloudcast info')
+        info = json.loads(json_data)
+
+        preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
+        song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
+        template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
+        final_song_url = self.check_urls(template_url % i for i in range(30))
+
+        return {
+            'id': track_id,
+            'title': info['name'],
+            'url': final_song_url,
+            'ext': 'mp3',
+            'description': info['description'],
+            'thumbnail': info['pictures'].get('extra_large'),
+            'uploader': info['user']['name'],
+            'uploader_id': info['user']['username'],
+            'upload_date': unified_strdate(info['created_time']),
+            'view_count': info['play_count'],
+        }

+ 38 - 0
youtube_dl/extractor/newgrounds.py

@@ -0,0 +1,38 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class NewgroundsIE(InfoExtractor):
+    _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)'
+    _TEST = {
+        u'url': u'http://www.newgrounds.com/audio/listen/549479',
+        u'file': u'549479.mp3',
+        u'md5': u'fe6033d297591288fa1c1f780386f07a',
+        u'info_dict': {
+            u"title": u"B7 - BusMode",
+            u"uploader": u"Burn7",
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        music_id = mobj.group('id')
+        webpage = self._download_webpage(url, music_id)
+        
+        title = self._html_search_regex(r',"name":"([^"]+)",', webpage, u'music title')
+        uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, u'music uploader')
+        
+        music_url_json_string = self._html_search_regex(r'({"url":"[^"]+"),', webpage, u'music url') + '}'
+        music_url_json = json.loads(music_url_json_string)
+        music_url = music_url_json['url']
+
+        return {
+            'id':       music_id,
+            'title':    title,
+            'url':      music_url,
+            'uploader': uploader,
+            'ext':      determine_ext(music_url),
+        }

+ 8 - 2
youtube_dl/extractor/ooyala.py

@@ -18,11 +18,15 @@ class OoyalaIE(InfoExtractor):
         },
         },
     }
     }
 
 
+    @staticmethod
+    def _url_for_embed_code(embed_code):
+        return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
+
     def _extract_result(self, info, more_info):
     def _extract_result(self, info, more_info):
         return {'id': info['embedCode'],
         return {'id': info['embedCode'],
                 'ext': 'mp4',
                 'ext': 'mp4',
                 'title': unescapeHTML(info['title']),
                 'title': unescapeHTML(info['title']),
-                'url': info['url'],
+                'url': info.get('ipad_url') or info['url'],
                 'description': unescapeHTML(more_info['description']),
                 'description': unescapeHTML(more_info['description']),
                 'thumbnail': more_info['promo'],
                 'thumbnail': more_info['promo'],
                 }
                 }
@@ -35,7 +39,9 @@ class OoyalaIE(InfoExtractor):
         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
                                         player, u'mobile player url')
                                         player, u'mobile player url')
         mobile_player = self._download_webpage(mobile_url, embedCode)
         mobile_player = self._download_webpage(mobile_url, embedCode)
-        videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
+        videos_info = self._search_regex(
+            r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
+            mobile_player, u'info').replace('\\"','"')
         videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
         videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
         videos_info = json.loads(videos_info)
         videos_info = json.loads(videos_info)
         videos_more_info =json.loads(videos_more_info)
         videos_more_info =json.loads(videos_more_info)

+ 16 - 3
youtube_dl/extractor/rtlnow.py

@@ -8,8 +8,8 @@ from ..utils import (
 )
 )
 
 
 class RTLnowIE(InfoExtractor):
 class RTLnowIE(InfoExtractor):
-    """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
-    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+    """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW and VOX NOW"""
+    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?rtlnitronow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
     _TESTS = [{
     _TESTS = [{
         u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
         u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
         u'file': u'90419.flv',
         u'file': u'90419.flv',
@@ -61,6 +61,19 @@ class RTLnowIE(InfoExtractor):
         u'params': {
         u'params': {
             u'skip_download': True,
             u'skip_download': True,
         },
         },
+    },
+    {
+        u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1',
+        u'file': u'127367.flv',
+        u'info_dict': {
+            u'upload_date': u'20130926', 
+            u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...',
+            u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin',
+            u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg',
+        },
+        u'params': {
+            u'skip_download': True,
+        },
     }]
     }]
 
 
     def _real_extract(self,url):
     def _real_extract(self,url):
@@ -79,7 +92,7 @@ class RTLnowIE(InfoExtractor):
             msg = clean_html(note_m.group(1))
             msg = clean_html(note_m.group(1))
             raise ExtractorError(msg)
             raise ExtractorError(msg)
 
 
-        video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>',
+        video_title = self._html_search_regex(r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>',
             webpage, u'title')
             webpage, u'title')
         playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
         playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
             webpage, u'playerdata_url')
             webpage, u'playerdata_url')

+ 43 - 2
youtube_dl/extractor/soundcloud.py

@@ -1,10 +1,12 @@
 import json
 import json
 import re
 import re
+import itertools
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
     compat_str,
     compat_str,
     compat_urlparse,
     compat_urlparse,
+    compat_urllib_parse,
 
 
     ExtractorError,
     ExtractorError,
     unified_strdate,
     unified_strdate,
@@ -53,10 +55,11 @@ class SoundcloudIE(InfoExtractor):
     def _resolv_url(cls, url):
     def _resolv_url(cls, url):
         return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
         return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
 
 
-    def _extract_info_dict(self, info, full_title=None):
+    def _extract_info_dict(self, info, full_title=None, quiet=False):
         video_id = info['id']
         video_id = info['id']
         name = full_title or video_id
         name = full_title or video_id
-        self.report_extraction(name)
+        if quiet == False:
+            self.report_extraction(name)
 
 
         thumbnail = info['artwork_url']
         thumbnail = info['artwork_url']
         if thumbnail is not None:
         if thumbnail is not None:
@@ -198,3 +201,41 @@ class SoundcloudSetIE(SoundcloudIE):
                 'id': info['id'],
                 'id': info['id'],
                 'title': info['title'],
                 'title': info['title'],
                 }
                 }
+
+
+class SoundcloudUserIE(SoundcloudIE):
+    _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
+    IE_NAME = u'soundcloud:user'
+
+    # it's in tests/test_playlists.py
+    _TEST = None
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        uploader = mobj.group('user')
+
+        url = 'http://soundcloud.com/%s/' % uploader
+        resolv_url = self._resolv_url(url)
+        user_json = self._download_webpage(resolv_url, uploader,
+            u'Downloading user info')
+        user = json.loads(user_json)
+
+        tracks = []
+        for i in itertools.count():
+            data = compat_urllib_parse.urlencode({'offset': i*50,
+                                                  'client_id': self._CLIENT_ID,
+                                                  })
+            tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
+            response = self._download_webpage(tracks_url, uploader, 
+                u'Downloading tracks page %s' % (i+1))
+            new_tracks = json.loads(response)
+            tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
+            if len(new_tracks) < 50:
+                break
+
+        return {
+            '_type': 'playlist',
+            'id': compat_str(user['id']),
+            'title': user['username'],
+            'entries': tracks,
+        }

+ 38 - 0
youtube_dl/extractor/southparkstudios.py

@@ -0,0 +1,38 @@
+import re
+
+from .mtv import MTVIE, _media_xml_tag
+
+
+class SouthParkStudiosIE(MTVIE):
+    IE_NAME = u'southparkstudios.com'
+    _VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$)'
+
+    _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
+
+    _TEST = {
+        u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
+        u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
+        u'info_dict': {
+            u'title': u'Bat Daded',
+            u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
+        },
+    }
+
+    # Overwrite MTVIE properties we don't want
+    _TESTS = []
+
+    def _get_thumbnail_url(self, uri, itemdoc):
+        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+        thumb_node = itemdoc.find(search_path)
+        if thumb_node is None:
+            return None
+        else:
+            return thumb_node.attrib['url']
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
+                                  webpage, u'mgid')
+        return self._get_videos_info(mgid)

+ 2 - 3
youtube_dl/extractor/subtitles.py

@@ -10,8 +10,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
     @property
     @property
     def _have_to_download_any_subtitles(self):
     def _have_to_download_any_subtitles(self):
         return any([self._downloader.params.get('writesubtitles', False),
         return any([self._downloader.params.get('writesubtitles', False),
-                    self._downloader.params.get('writeautomaticsub'),
-                    self._downloader.params.get('allsubtitles', False)])
+                    self._downloader.params.get('writeautomaticsub')])
 
 
     def _list_available_subtitles(self, video_id, webpage=None):
     def _list_available_subtitles(self, video_id, webpage=None):
         """ outputs the available subtitles for the video """
         """ outputs the available subtitles for the video """
@@ -34,7 +33,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
         available_subs_list = {}
         available_subs_list = {}
         if self._downloader.params.get('writeautomaticsub', False):
         if self._downloader.params.get('writeautomaticsub', False):
             available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
             available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
-        if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
+        if self._downloader.params.get('writesubtitles', False):
             available_subs_list.update(self._get_available_subtitles(video_id))
             available_subs_list.update(self._get_available_subtitles(video_id))
 
 
         if not available_subs_list:  # error, it didn't get the available subtitles
         if not available_subs_list:  # error, it didn't get the available subtitles

+ 2 - 2
youtube_dl/extractor/trilulilu.py

@@ -52,6 +52,7 @@ class TriluliluIE(InfoExtractor):
             {
             {
                 'format': fnode.text,
                 'format': fnode.text,
                 'url': video_url_template % fnode.text,
                 'url': video_url_template % fnode.text,
+                'ext': fnode.text.partition('-')[0]
             }
             }
 
 
             for fnode in format_doc.findall('./formats/format')
             for fnode in format_doc.findall('./formats/format')
@@ -67,7 +68,6 @@ class TriluliluIE(InfoExtractor):
         }
         }
 
 
         # TODO: Remove when #980 has been merged
         # TODO: Remove when #980 has been merged
-        info['url'] = formats[-1]['url']
-        info['ext'] = formats[-1]['format'].partition('-')[0]
+        info.update(formats[-1])
 
 
         return info
         return info

+ 38 - 0
youtube_dl/extractor/vice.py

@@ -0,0 +1,38 @@
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+from ..utils import ExtractorError
+
+
+class ViceIE(InfoExtractor):
+    _VALID_URL = r'http://www.vice.com/.*?/(?P<name>.+)'
+
+    _TEST = {
+        u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
+        u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4',
+        u'info_dict': {
+            u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+        },
+        u'params': {
+            # Requires ffmpeg (m3u8 manifest)
+            u'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        name = mobj.group('name')
+        webpage = self._download_webpage(url, name)
+        try:
+            ooyala_url = self._og_search_video_url(webpage)
+        except ExtractorError:
+            try:
+                embed_code = self._search_regex(
+                    r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage,
+                    u'ooyala embed code')
+                ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
+            except ExtractorError:
+                raise ExtractorError(u'The page doesn\'t contain a video', expected=True)
+        return self.url_result(ooyala_url, ie='Ooyala')
+

+ 15 - 4
youtube_dl/extractor/xhamster.py

@@ -11,8 +11,8 @@ from ..utils import (
 
 
 class XHamsterIE(InfoExtractor):
 class XHamsterIE(InfoExtractor):
     """Information Extractor for xHamster"""
     """Information Extractor for xHamster"""
-    _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
-    _TEST = {
+    _VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
+    _TESTS = [{
         u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
         u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
         u'file': u'1509445.flv',
         u'file': u'1509445.flv',
         u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa',
         u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa',
@@ -21,13 +21,24 @@ class XHamsterIE(InfoExtractor):
             u"uploader_id": u"Ruseful2011", 
             u"uploader_id": u"Ruseful2011", 
             u"title": u"FemaleAgent Shy beauty takes the bait"
             u"title": u"FemaleAgent Shy beauty takes the bait"
         }
         }
-    }
+    },
+    {
+        u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+        u'file': u'2221348.flv',
+        u'md5': u'e767b9475de189320f691f49c679c4c7',
+        u'info_dict': {
+            u"upload_date": u"20130914", 
+            u"uploader_id": u"jojo747400", 
+            u"title": u"Britney Spears  Sexy Booty"
+        }
+    }]
 
 
     def _real_extract(self,url):
     def _real_extract(self,url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
 
 
         video_id = mobj.group('id')
         video_id = mobj.group('id')
-        mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
+        seo = mobj.group('seo')
+        mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo)
         webpage = self._download_webpage(mrss_url, video_id)
         webpage = self._download_webpage(mrss_url, video_id)
 
 
         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)

+ 65 - 67
youtube_dl/extractor/yahoo.py

@@ -1,4 +1,3 @@
-import datetime
 import itertools
 import itertools
 import json
 import json
 import re
 import re
@@ -6,86 +5,85 @@ import re
 from .common import InfoExtractor, SearchInfoExtractor
 from .common import InfoExtractor, SearchInfoExtractor
 from ..utils import (
 from ..utils import (
     compat_urllib_parse,
     compat_urllib_parse,
-
-    ExtractorError,
+    compat_urlparse,
+    determine_ext,
+    clean_html,
 )
 )
 
 
+
 class YahooIE(InfoExtractor):
 class YahooIE(InfoExtractor):
     IE_DESC = u'Yahoo screen'
     IE_DESC = u'Yahoo screen'
     _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
     _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
-    _TEST = {
-        u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
-        u'file': u'214727115.flv',
-        u'md5': u'2e717f169c1be93d84d3794a00d4a325',
-        u'info_dict': {
-            u"title": u"Julian Smith & Travis Legg Watch Julian Smith"
+    _TESTS = [
+        {
+            u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
+            u'file': u'214727115.mp4',
+            u'info_dict': {
+                u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
+                u'description': u'Julian and Travis watch Julian Smith',
+            },
         },
         },
-        u'skip': u'Requires rtmpdump'
-    }
+        {
+            u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
+            u'file': u'103000935.flv',
+            u'info_dict': {
+                u'title': u'The Cougar Lies with Spanish Moss',
+                u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
+            },
+            u'params': {
+                # Requires rtmpdump
+                u'skip_download': True,
+            },
+        },
+    ]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
         video_id = mobj.group('id')
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
         webpage = self._download_webpage(url, video_id)
-        m_id = re.search(r'YUI\.namespace\("Media"\)\.CONTENT_ID = "(?P<new_id>.+?)";', webpage)
 
 
-        if m_id is None: 
-            # TODO: Check which url parameters are required
-            info_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;lmsoverride=1;outputformat=mrss;cb=974419660;id=%s;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
-            webpage = self._download_webpage(info_url, video_id, u'Downloading info webpage')
-            info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.*
-                        <description><!\[CDATA\[(?P<description>.*?)\]\]></description>.*
-                        <media:pubStart><!\[CDATA\[(?P<date>.*?)\ .*\]\]></media:pubStart>.*
-                        <media:content\ medium="image"\ url="(?P<thumb>.*?)"\ name="LARGETHUMB"
-                        '''
-            self.report_extraction(video_id)
-            m_info = re.search(info_re, webpage, re.VERBOSE|re.DOTALL)
-            if m_info is None:
-                raise ExtractorError(u'Unable to extract video info')
-            video_title = m_info.group('title')
-            video_description = m_info.group('description')
-            video_thumb = m_info.group('thumb')
-            video_date = m_info.group('date')
-            video_date = datetime.datetime.strptime(video_date, '%m/%d/%Y').strftime('%Y%m%d')
-    
-            # TODO: Find a way to get mp4 videos
-            rest_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;element=stream;outputformat=mrss;id=%s;lmsoverride=1;bw=375;dynamicstream=1;cb=83521105;tech=flv,mp4;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
-            webpage = self._download_webpage(rest_url, video_id, u'Downloading video url webpage')
-            m_rest = re.search(r'<media:content url="(?P<url>.*?)" path="(?P<path>.*?)"', webpage)
-            video_url = m_rest.group('url')
-            video_path = m_rest.group('path')
-            if m_rest is None:
-                raise ExtractorError(u'Unable to extract video url')
+        items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
+            webpage, u'items', flags=re.MULTILINE)
+        items = json.loads(items_json)
+        info = items['mediaItems']['query']['results']['mediaObj'][0]
+        meta = info['meta']
+
+        formats = []
+        for s in info['streams']:
+            format_info = {
+                'width': s.get('width'),
+                'height': s.get('height'),
+                'bitrate': s.get('bitrate'),
+            }
+
+            host = s['host']
+            path = s['path']
+            if host.startswith('rtmp'):
+                format_info.update({
+                    'url': host,
+                    'play_path': path,
+                    'ext': 'flv',
+                })
+            else:
+                format_url = compat_urlparse.urljoin(host, path)
+                format_info['url'] = format_url
+                format_info['ext'] = determine_ext(format_url)
+                
+            formats.append(format_info)
+        formats = sorted(formats, key=lambda f:(f['height'], f['width']))
+
+        info = {
+            'id': video_id,
+            'title': meta['title'],
+            'formats': formats,
+            'description': clean_html(meta['description']),
+            'thumbnail': meta['thumbnail'],
+        }
+        # TODO: Remove when #980 has been merged
+        info.update(formats[-1])
 
 
-        else: # We have to use a different method if another id is defined
-            long_id = m_id.group('new_id')
-            info_url = 'http://video.query.yahoo.com/v1/public/yql?q=SELECT%20*%20FROM%20yahoo.media.video.streams%20WHERE%20id%3D%22' + long_id + '%22%20AND%20format%3D%22mp4%2Cflv%22%20AND%20protocol%3D%22rtmp%2Chttp%22%20AND%20plrs%3D%2286Gj0vCaSzV_Iuf6hNylf2%22%20AND%20acctid%3D%22389%22%20AND%20plidl%3D%22%22%20AND%20pspid%3D%22792700001%22%20AND%20offnetwork%3D%22false%22%20AND%20site%3D%22ivy%22%20AND%20lang%3D%22en-US%22%20AND%20region%3D%22US%22%20AND%20override%3D%22none%22%3B&env=prod&format=json&callback=YUI.Env.JSONP.yui_3_8_1_1_1368368376830_335'
-            webpage = self._download_webpage(info_url, video_id, u'Downloading info json')
-            json_str = re.search(r'YUI.Env.JSONP.yui.*?\((.*?)\);', webpage).group(1)
-            info = json.loads(json_str)
-            res = info[u'query'][u'results'][u'mediaObj'][0]
-            stream = res[u'streams'][0]
-            video_path = stream[u'path']
-            video_url = stream[u'host']
-            meta = res[u'meta']
-            video_title = meta[u'title']
-            video_description = meta[u'description']
-            video_thumb = meta[u'thumbnail']
-            video_date = None # I can't find it
+        return info
 
 
-        info_dict = {
-                     'id': video_id,
-                     'url': video_url,
-                     'play_path': video_path,
-                     'title':video_title,
-                     'description': video_description,
-                     'thumbnail': video_thumb,
-                     'upload_date': video_date,
-                     'ext': 'flv',
-                     }
-        return info_dict
 
 
 class YahooSearchIE(SearchInfoExtractor):
 class YahooSearchIE(SearchInfoExtractor):
     IE_DESC = u'Yahoo screen search'
     IE_DESC = u'Yahoo screen search'

+ 7 - 0
youtube_dl/extractor/youku.py

@@ -66,6 +66,12 @@ class YoukuIE(InfoExtractor):
         self.report_extraction(video_id)
         self.report_extraction(video_id)
         try:
         try:
             config = json.loads(jsondata)
             config = json.loads(jsondata)
+            error_code = config['data'][0].get('error_code')
+            if error_code:
+                # -8 means blocked outside China.
+                error = config['data'][0].get('error')  # Chinese and English, separated by newline.
+                raise ExtractorError(error or u'Server reported error %i' % error_code,
+                    expected=True)
 
 
             video_title =  config['data'][0]['title']
             video_title =  config['data'][0]['title']
             seed = config['data'][0]['seed']
             seed = config['data'][0]['seed']
@@ -89,6 +95,7 @@ class YoukuIE(InfoExtractor):
 
 
             fileid = config['data'][0]['streamfileids'][format]
             fileid = config['data'][0]['streamfileids'][format]
             keys = [s['k'] for s in config['data'][0]['segs'][format]]
             keys = [s['k'] for s in config['data'][0]['segs'][format]]
+            # segs is usually a dictionary, but an empty *list* if an error occured.
         except (UnicodeDecodeError, ValueError, KeyError):
         except (UnicodeDecodeError, ValueError, KeyError):
             raise ExtractorError(u'Unable to extract info section')
             raise ExtractorError(u'Unable to extract info section')
 
 

+ 706 - 50
youtube_dl/extractor/youtube.py

@@ -1,15 +1,23 @@
 # coding: utf-8
 # coding: utf-8
 
 
+import collections
+import errno
+import io
+import itertools
 import json
 import json
-import netrc
+import os.path
 import re
 import re
 import socket
 import socket
-import itertools
+import string
+import struct
+import traceback
 import xml.etree.ElementTree
 import xml.etree.ElementTree
+import zlib
 
 
 from .common import InfoExtractor, SearchInfoExtractor
 from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
 from ..utils import (
+    compat_chr,
     compat_http_client,
     compat_http_client,
     compat_parse_qs,
     compat_parse_qs,
     compat_urllib_error,
     compat_urllib_error,
@@ -23,6 +31,7 @@ from ..utils import (
     unescapeHTML,
     unescapeHTML,
     unified_strdate,
     unified_strdate,
     orderedSet,
     orderedSet,
+    write_json_file,
 )
 )
 
 
 class YoutubeBaseInfoExtractor(InfoExtractor):
 class YoutubeBaseInfoExtractor(InfoExtractor):
@@ -139,7 +148,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                      (
                      (
                          (?:https?://)?                                       # http(s):// (optional)
                          (?:https?://)?                                       # http(s):// (optional)
                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
-                            tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
+                            tube\.majestyc\.net/|
+                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                          (?:                                                  # the various things that can precede the ID:
                          (?:                                                  # the various things that can precede the ID:
                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
@@ -351,7 +361,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             u"info_dict": {
             u"info_dict": {
                 u"upload_date": u"20120506",
                 u"upload_date": u"20120506",
                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
-                u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
+                u"description": u"md5:5b292926389560516e384ac437c0ec07",
                 u"uploader": u"Icona Pop",
                 u"uploader": u"Icona Pop",
                 u"uploader_id": u"IconaPop"
                 u"uploader_id": u"IconaPop"
             }
             }
@@ -368,21 +378,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 u"uploader_id": u"justintimberlakeVEVO"
                 u"uploader_id": u"justintimberlakeVEVO"
             }
             }
         },
         },
-        {
-            u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
-            u'file': u'TGi3HqYrWHE.mp4',
-            u'note': u'm3u8 video',
-            u'info_dict': {
-                u'title': u'Triathlon - Men - London 2012 Olympic Games',
-                u'description': u'- Men -  TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
-                u'uploader': u'olympic',
-                u'upload_date': u'20120807',
-                u'uploader_id': u'olympic',
-            },
-            u'params': {
-                u'skip_download': True,
-            },
-        },
     ]
     ]
 
 
 
 
@@ -392,6 +387,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         if YoutubePlaylistIE.suitable(url): return False
         if YoutubePlaylistIE.suitable(url): return False
         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 
 
+    def __init__(self, *args, **kwargs):
+        super(YoutubeIE, self).__init__(*args, **kwargs)
+        self._player_cache = {}
+
     def report_video_webpage_download(self, video_id):
     def report_video_webpage_download(self, video_id):
         """Report attempt to download video webpage."""
         """Report attempt to download video webpage."""
         self.to_screen(u'%s: Downloading video webpage' % video_id)
         self.to_screen(u'%s: Downloading video webpage' % video_id)
@@ -412,11 +411,664 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         """Indicate the download will use the RTMP protocol."""
         """Indicate the download will use the RTMP protocol."""
         self.to_screen(u'RTMP download detected')
         self.to_screen(u'RTMP download detected')
 
 
-    def _decrypt_signature(self, s):
+    def _extract_signature_function(self, video_id, player_url, slen):
+        id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
+                        player_url)
+        player_type = id_m.group('ext')
+        player_id = id_m.group('id')
+
+        # Read from filesystem cache
+        func_id = '%s_%s_%d' % (player_type, player_id, slen)
+        assert os.path.basename(func_id) == func_id
+        cache_dir = self._downloader.params.get('cachedir',
+                                                u'~/.youtube-dl/cache')
+
+        cache_enabled = cache_dir is not None
+        if cache_enabled:
+            cache_fn = os.path.join(os.path.expanduser(cache_dir),
+                                    u'youtube-sigfuncs',
+                                    func_id + '.json')
+            try:
+                with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
+                    cache_spec = json.load(cachef)
+                return lambda s: u''.join(s[i] for i in cache_spec)
+            except IOError:
+                pass  # No cache available
+
+        if player_type == 'js':
+            code = self._download_webpage(
+                player_url, video_id,
+                note=u'Downloading %s player %s' % (player_type, player_id),
+                errnote=u'Download of %s failed' % player_url)
+            res = self._parse_sig_js(code)
+        elif player_type == 'swf':
+            urlh = self._request_webpage(
+                player_url, video_id,
+                note=u'Downloading %s player %s' % (player_type, player_id),
+                errnote=u'Download of %s failed' % player_url)
+            code = urlh.read()
+            res = self._parse_sig_swf(code)
+        else:
+            assert False, 'Invalid player type %r' % player_type
+
+        if cache_enabled:
+            try:
+                test_string = u''.join(map(compat_chr, range(slen)))
+                cache_res = res(test_string)
+                cache_spec = [ord(c) for c in cache_res]
+                try:
+                    os.makedirs(os.path.dirname(cache_fn))
+                except OSError as ose:
+                    if ose.errno != errno.EEXIST:
+                        raise
+                write_json_file(cache_spec, cache_fn)
+            except Exception:
+                tb = traceback.format_exc()
+                self._downloader.report_warning(
+                    u'Writing cache to %r failed: %s' % (cache_fn, tb))
+
+        return res
+
+    def _print_sig_code(self, func, slen):
+        def gen_sig_code(idxs):
+            def _genslice(start, end, step):
+                starts = u'' if start == 0 else str(start)
+                ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
+                steps = u'' if step == 1 else (u':%d' % step)
+                return u's[%s%s%s]' % (starts, ends, steps)
+
+            step = None
+            start = '(Never used)'  # Quelch pyflakes warnings - start will be
+                                    # set as soon as step is set
+            for i, prev in zip(idxs[1:], idxs[:-1]):
+                if step is not None:
+                    if i - prev == step:
+                        continue
+                    yield _genslice(start, prev, step)
+                    step = None
+                    continue
+                if i - prev in [-1, 1]:
+                    step = i - prev
+                    start = prev
+                    continue
+                else:
+                    yield u's[%d]' % prev
+            if step is None:
+                yield u's[%d]' % i
+            else:
+                yield _genslice(start, i, step)
+
+        test_string = u''.join(map(compat_chr, range(slen)))
+        cache_res = func(test_string)
+        cache_spec = [ord(c) for c in cache_res]
+        expr_code = u' + '.join(gen_sig_code(cache_spec))
+        code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
+        self.to_screen(u'Extracted signature function:\n' + code)
+
+    def _parse_sig_js(self, jscode):
+        funcname = self._search_regex(
+            r'signature=([a-zA-Z]+)', jscode,
+            u'Initial JS player signature function name')
+
+        functions = {}
+
+        def argidx(varname):
+            return string.lowercase.index(varname)
+
+        def interpret_statement(stmt, local_vars, allow_recursion=20):
+            if allow_recursion < 0:
+                raise ExtractorError(u'Recursion limit reached')
+
+            if stmt.startswith(u'var '):
+                stmt = stmt[len(u'var '):]
+            ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
+                             r'=(?P<expr>.*)$', stmt)
+            if ass_m:
+                if ass_m.groupdict().get('index'):
+                    def assign(val):
+                        lvar = local_vars[ass_m.group('out')]
+                        idx = interpret_expression(ass_m.group('index'),
+                                                   local_vars, allow_recursion)
+                        assert isinstance(idx, int)
+                        lvar[idx] = val
+                        return val
+                    expr = ass_m.group('expr')
+                else:
+                    def assign(val):
+                        local_vars[ass_m.group('out')] = val
+                        return val
+                    expr = ass_m.group('expr')
+            elif stmt.startswith(u'return '):
+                assign = lambda v: v
+                expr = stmt[len(u'return '):]
+            else:
+                raise ExtractorError(
+                    u'Cannot determine left side of statement in %r' % stmt)
+
+            v = interpret_expression(expr, local_vars, allow_recursion)
+            return assign(v)
+
+        def interpret_expression(expr, local_vars, allow_recursion):
+            if expr.isdigit():
+                return int(expr)
+
+            if expr.isalpha():
+                return local_vars[expr]
+
+            m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
+            if m:
+                member = m.group('member')
+                val = local_vars[m.group('in')]
+                if member == 'split("")':
+                    return list(val)
+                if member == 'join("")':
+                    return u''.join(val)
+                if member == 'length':
+                    return len(val)
+                if member == 'reverse()':
+                    return val[::-1]
+                slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
+                if slice_m:
+                    idx = interpret_expression(
+                        slice_m.group('idx'), local_vars, allow_recursion-1)
+                    return val[idx:]
+
+            m = re.match(
+                r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
+            if m:
+                val = local_vars[m.group('in')]
+                idx = interpret_expression(m.group('idx'), local_vars,
+                                           allow_recursion-1)
+                return val[idx]
+
+            m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
+            if m:
+                a = interpret_expression(m.group('a'),
+                                         local_vars, allow_recursion)
+                b = interpret_expression(m.group('b'),
+                                         local_vars, allow_recursion)
+                return a % b
+
+            m = re.match(
+                r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
+            if m:
+                fname = m.group('func')
+                if fname not in functions:
+                    functions[fname] = extract_function(fname)
+                argvals = [int(v) if v.isdigit() else local_vars[v]
+                           for v in m.group('args').split(',')]
+                return functions[fname](argvals)
+            raise ExtractorError(u'Unsupported JS expression %r' % expr)
+
+        def extract_function(funcname):
+            func_m = re.search(
+                r'function ' + re.escape(funcname) +
+                r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+                jscode)
+            argnames = func_m.group('args').split(',')
+
+            def resf(args):
+                local_vars = dict(zip(argnames, args))
+                for stmt in func_m.group('code').split(';'):
+                    res = interpret_statement(stmt, local_vars)
+                return res
+            return resf
+
+        initial_function = extract_function(funcname)
+        return lambda s: initial_function([s])
+
+    def _parse_sig_swf(self, file_contents):
+        if file_contents[1:3] != b'WS':
+            raise ExtractorError(
+                u'Not an SWF file; header is %r' % file_contents[:3])
+        if file_contents[:1] == b'C':
+            content = zlib.decompress(file_contents[8:])
+        else:
+            raise NotImplementedError(u'Unsupported compression format %r' %
+                                      file_contents[:1])
+
+        def extract_tags(content):
+            pos = 0
+            while pos < len(content):
+                header16 = struct.unpack('<H', content[pos:pos+2])[0]
+                pos += 2
+                tag_code = header16 >> 6
+                tag_len = header16 & 0x3f
+                if tag_len == 0x3f:
+                    tag_len = struct.unpack('<I', content[pos:pos+4])[0]
+                    pos += 4
+                assert pos+tag_len <= len(content)
+                yield (tag_code, content[pos:pos+tag_len])
+                pos += tag_len
+
+        code_tag = next(tag
+                        for tag_code, tag in extract_tags(content)
+                        if tag_code == 82)
+        p = code_tag.index(b'\0', 4) + 1
+        code_reader = io.BytesIO(code_tag[p:])
+
+        # Parse ABC (AVM2 ByteCode)
+        def read_int(reader=None):
+            if reader is None:
+                reader = code_reader
+            res = 0
+            shift = 0
+            for _ in range(5):
+                buf = reader.read(1)
+                assert len(buf) == 1
+                b = struct.unpack('<B', buf)[0]
+                res = res | ((b & 0x7f) << shift)
+                if b & 0x80 == 0:
+                    break
+                shift += 7
+            return res
+
+        def u30(reader=None):
+            res = read_int(reader)
+            assert res & 0xf0000000 == 0
+            return res
+        u32 = read_int
+
+        def s32(reader=None):
+            v = read_int(reader)
+            if v & 0x80000000 != 0:
+                v = - ((v ^ 0xffffffff) + 1)
+            return v
+
+        def read_string(reader=None):
+            if reader is None:
+                reader = code_reader
+            slen = u30(reader)
+            resb = reader.read(slen)
+            assert len(resb) == slen
+            return resb.decode('utf-8')
+
+        def read_bytes(count, reader=None):
+            if reader is None:
+                reader = code_reader
+            resb = reader.read(count)
+            assert len(resb) == count
+            return resb
+
+        def read_byte(reader=None):
+            resb = read_bytes(1, reader=reader)
+            res = struct.unpack('<B', resb)[0]
+            return res
+
+        # minor_version + major_version
+        read_bytes(2 + 2)
+
+        # Constant pool
+        int_count = u30()
+        for _c in range(1, int_count):
+            s32()
+        uint_count = u30()
+        for _c in range(1, uint_count):
+            u32()
+        double_count = u30()
+        read_bytes((double_count-1) * 8)
+        string_count = u30()
+        constant_strings = [u'']
+        for _c in range(1, string_count):
+            s = read_string()
+            constant_strings.append(s)
+        namespace_count = u30()
+        for _c in range(1, namespace_count):
+            read_bytes(1)  # kind
+            u30()  # name
+        ns_set_count = u30()
+        for _c in range(1, ns_set_count):
+            count = u30()
+            for _c2 in range(count):
+                u30()
+        multiname_count = u30()
+        MULTINAME_SIZES = {
+            0x07: 2,  # QName
+            0x0d: 2,  # QNameA
+            0x0f: 1,  # RTQName
+            0x10: 1,  # RTQNameA
+            0x11: 0,  # RTQNameL
+            0x12: 0,  # RTQNameLA
+            0x09: 2,  # Multiname
+            0x0e: 2,  # MultinameA
+            0x1b: 1,  # MultinameL
+            0x1c: 1,  # MultinameLA
+        }
+        multinames = [u'']
+        for _c in range(1, multiname_count):
+            kind = u30()
+            assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
+            if kind == 0x07:
+                u30()  # namespace_idx
+                name_idx = u30()
+                multinames.append(constant_strings[name_idx])
+            else:
+                multinames.append('[MULTINAME kind: %d]' % kind)
+                for _c2 in range(MULTINAME_SIZES[kind]):
+                    u30()
+
+        # Methods
+        method_count = u30()
+        MethodInfo = collections.namedtuple(
+            'MethodInfo',
+            ['NEED_ARGUMENTS', 'NEED_REST'])
+        method_infos = []
+        for method_id in range(method_count):
+            param_count = u30()
+            u30()  # return type
+            for _ in range(param_count):
+                u30()  # param type
+            u30()  # name index (always 0 for youtube)
+            flags = read_byte()
+            if flags & 0x08 != 0:
+                # Options present
+                option_count = u30()
+                for c in range(option_count):
+                    u30()  # val
+                    read_bytes(1)  # kind
+            if flags & 0x80 != 0:
+                # Param names present
+                for _ in range(param_count):
+                    u30()  # param name
+            mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
+            method_infos.append(mi)
+
+        # Metadata
+        metadata_count = u30()
+        for _c in range(metadata_count):
+            u30()  # name
+            item_count = u30()
+            for _c2 in range(item_count):
+                u30()  # key
+                u30()  # value
+
+        def parse_traits_info():
+            trait_name_idx = u30()
+            kind_full = read_byte()
+            kind = kind_full & 0x0f
+            attrs = kind_full >> 4
+            methods = {}
+            if kind in [0x00, 0x06]:  # Slot or Const
+                u30()  # Slot id
+                u30()  # type_name_idx
+                vindex = u30()
+                if vindex != 0:
+                    read_byte()  # vkind
+            elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
+                u30()  # disp_id
+                method_idx = u30()
+                methods[multinames[trait_name_idx]] = method_idx
+            elif kind == 0x04:  # Class
+                u30()  # slot_id
+                u30()  # classi
+            elif kind == 0x05:  # Function
+                u30()  # slot_id
+                function_idx = u30()
+                methods[function_idx] = multinames[trait_name_idx]
+            else:
+                raise ExtractorError(u'Unsupported trait kind %d' % kind)
+
+            if attrs & 0x4 != 0:  # Metadata present
+                metadata_count = u30()
+                for _c3 in range(metadata_count):
+                    u30()  # metadata index
+
+            return methods
+
+        # Classes
+        TARGET_CLASSNAME = u'SignatureDecipher'
+        searched_idx = multinames.index(TARGET_CLASSNAME)
+        searched_class_id = None
+        class_count = u30()
+        for class_id in range(class_count):
+            name_idx = u30()
+            if name_idx == searched_idx:
+                # We found the class we're looking for!
+                searched_class_id = class_id
+            u30()  # super_name idx
+            flags = read_byte()
+            if flags & 0x08 != 0:  # Protected namespace is present
+                u30()  # protected_ns_idx
+            intrf_count = u30()
+            for _c2 in range(intrf_count):
+                u30()
+            u30()  # iinit
+            trait_count = u30()
+            for _c2 in range(trait_count):
+                parse_traits_info()
+
+        if searched_class_id is None:
+            raise ExtractorError(u'Target class %r not found' %
+                                 TARGET_CLASSNAME)
+
+        method_names = {}
+        method_idxs = {}
+        for class_id in range(class_count):
+            u30()  # cinit
+            trait_count = u30()
+            for _c2 in range(trait_count):
+                trait_methods = parse_traits_info()
+                if class_id == searched_class_id:
+                    method_names.update(trait_methods.items())
+                    method_idxs.update(dict(
+                        (idx, name)
+                        for name, idx in trait_methods.items()))
+
+        # Scripts
+        script_count = u30()
+        for _c in range(script_count):
+            u30()  # init
+            trait_count = u30()
+            for _c2 in range(trait_count):
+                parse_traits_info()
+
+        # Method bodies
+        method_body_count = u30()
+        Method = collections.namedtuple('Method', ['code', 'local_count'])
+        methods = {}
+        for _c in range(method_body_count):
+            method_idx = u30()
+            u30()  # max_stack
+            local_count = u30()
+            u30()  # init_scope_depth
+            u30()  # max_scope_depth
+            code_length = u30()
+            code = read_bytes(code_length)
+            if method_idx in method_idxs:
+                m = Method(code, local_count)
+                methods[method_idxs[method_idx]] = m
+            exception_count = u30()
+            for _c2 in range(exception_count):
+                u30()  # from
+                u30()  # to
+                u30()  # target
+                u30()  # exc_type
+                u30()  # var_name
+            trait_count = u30()
+            for _c2 in range(trait_count):
+                parse_traits_info()
+
+        assert p + code_reader.tell() == len(code_tag)
+        assert len(methods) == len(method_idxs)
+
+        method_pyfunctions = {}
+
+        def extract_function(func_name):
+            if func_name in method_pyfunctions:
+                return method_pyfunctions[func_name]
+            if func_name not in methods:
+                raise ExtractorError(u'Cannot find function %r' % func_name)
+            m = methods[func_name]
+
+            def resfunc(args):
+                registers = ['(this)'] + list(args) + [None] * m.local_count
+                stack = []
+                coder = io.BytesIO(m.code)
+                while True:
+                    opcode = struct.unpack('!B', coder.read(1))[0]
+                    if opcode == 36:  # pushbyte
+                        v = struct.unpack('!B', coder.read(1))[0]
+                        stack.append(v)
+                    elif opcode == 44:  # pushstring
+                        idx = u30(coder)
+                        stack.append(constant_strings[idx])
+                    elif opcode == 48:  # pushscope
+                        # We don't implement the scope register, so we'll just
+                        # ignore the popped value
+                        stack.pop()
+                    elif opcode == 70:  # callproperty
+                        index = u30(coder)
+                        mname = multinames[index]
+                        arg_count = u30(coder)
+                        args = list(reversed(
+                            [stack.pop() for _ in range(arg_count)]))
+                        obj = stack.pop()
+                        if mname == u'split':
+                            assert len(args) == 1
+                            assert isinstance(args[0], compat_str)
+                            assert isinstance(obj, compat_str)
+                            if args[0] == u'':
+                                res = list(obj)
+                            else:
+                                res = obj.split(args[0])
+                            stack.append(res)
+                        elif mname == u'slice':
+                            assert len(args) == 1
+                            assert isinstance(args[0], int)
+                            assert isinstance(obj, list)
+                            res = obj[args[0]:]
+                            stack.append(res)
+                        elif mname == u'join':
+                            assert len(args) == 1
+                            assert isinstance(args[0], compat_str)
+                            assert isinstance(obj, list)
+                            res = args[0].join(obj)
+                            stack.append(res)
+                        elif mname in method_pyfunctions:
+                            stack.append(method_pyfunctions[mname](args))
+                        else:
+                            raise NotImplementedError(
+                                u'Unsupported property %r on %r'
+                                % (mname, obj))
+                    elif opcode == 72:  # returnvalue
+                        res = stack.pop()
+                        return res
+                    elif opcode == 79:  # callpropvoid
+                        index = u30(coder)
+                        mname = multinames[index]
+                        arg_count = u30(coder)
+                        args = list(reversed(
+                            [stack.pop() for _ in range(arg_count)]))
+                        obj = stack.pop()
+                        if mname == u'reverse':
+                            assert isinstance(obj, list)
+                            obj.reverse()
+                        else:
+                            raise NotImplementedError(
+                                u'Unsupported (void) property %r on %r'
+                                % (mname, obj))
+                    elif opcode == 93:  # findpropstrict
+                        index = u30(coder)
+                        mname = multinames[index]
+                        res = extract_function(mname)
+                        stack.append(res)
+                    elif opcode == 97:  # setproperty
+                        index = u30(coder)
+                        value = stack.pop()
+                        idx = stack.pop()
+                        obj = stack.pop()
+                        assert isinstance(obj, list)
+                        assert isinstance(idx, int)
+                        obj[idx] = value
+                    elif opcode == 98:  # getlocal
+                        index = u30(coder)
+                        stack.append(registers[index])
+                    elif opcode == 99:  # setlocal
+                        index = u30(coder)
+                        value = stack.pop()
+                        registers[index] = value
+                    elif opcode == 102:  # getproperty
+                        index = u30(coder)
+                        pname = multinames[index]
+                        if pname == u'length':
+                            obj = stack.pop()
+                            assert isinstance(obj, list)
+                            stack.append(len(obj))
+                        else:  # Assume attribute access
+                            idx = stack.pop()
+                            assert isinstance(idx, int)
+                            obj = stack.pop()
+                            assert isinstance(obj, list)
+                            stack.append(obj[idx])
+                    elif opcode == 128:  # coerce
+                        u30(coder)
+                    elif opcode == 133:  # coerce_s
+                        assert isinstance(stack[-1], (type(None), compat_str))
+                    elif opcode == 164:  # modulo
+                        value2 = stack.pop()
+                        value1 = stack.pop()
+                        res = value1 % value2
+                        stack.append(res)
+                    elif opcode == 208:  # getlocal_0
+                        stack.append(registers[0])
+                    elif opcode == 209:  # getlocal_1
+                        stack.append(registers[1])
+                    elif opcode == 210:  # getlocal_2
+                        stack.append(registers[2])
+                    elif opcode == 211:  # getlocal_3
+                        stack.append(registers[3])
+                    elif opcode == 214:  # setlocal_2
+                        registers[2] = stack.pop()
+                    elif opcode == 215:  # setlocal_3
+                        registers[3] = stack.pop()
+                    else:
+                        raise NotImplementedError(
+                            u'Unsupported opcode %d' % opcode)
+
+            method_pyfunctions[func_name] = resfunc
+            return resfunc
+
+        initial_function = extract_function(u'decipher')
+        return lambda s: initial_function([s])
+
+    def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
         """Turn the encrypted s field into a working signature"""
         """Turn the encrypted s field into a working signature"""
 
 
-        if len(s) == 92:
+        if player_url is not None:
+            try:
+                if player_url not in self._player_cache:
+                    func = self._extract_signature_function(
+                        video_id, player_url, len(s)
+                    )
+                    self._player_cache[player_url] = func
+                func = self._player_cache[player_url]
+                if self._downloader.params.get('youtube_print_sig_code'):
+                    self._print_sig_code(func, len(s))
+                return func(s)
+            except Exception:
+                tb = traceback.format_exc()
+                self._downloader.report_warning(
+                    u'Automatic signature extraction failed: ' + tb)
+
+            self._downloader.report_warning(
+                u'Warning: Falling back to static signature algorithm')
+
+        return self._static_decrypt_signature(
+            s, video_id, player_url, age_gate)
+
+    def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
+        if age_gate:
+            # The videos with age protection use another player, so the
+            # algorithms can be different.
+            if len(s) == 86:
+                return s[2:63] + s[82] + s[64:82] + s[63]
+
+        if len(s) == 93:
+            return s[86:29:-1] + s[88] + s[28:5:-1]
+        elif len(s) == 92:
             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
+        elif len(s) == 91:
+            return s[84:27:-1] + s[86] + s[26:5:-1]
         elif len(s) == 90:
         elif len(s) == 90:
             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
         elif len(s) == 89:
         elif len(s) == 89:
@@ -426,13 +1078,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         elif len(s) == 87:
         elif len(s) == 87:
             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
         elif len(s) == 86:
         elif len(s) == 86:
-            return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
+            return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
         elif len(s) == 85:
         elif len(s) == 85:
-            return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1]
+            return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
         elif len(s) == 84:
         elif len(s) == 84:
-            return s[81:36:-1] + s[0] + s[35:2:-1]
+            return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
         elif len(s) == 83:
         elif len(s) == 83:
-            return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
+            return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
         elif len(s) == 82:
         elif len(s) == 82:
             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
         elif len(s) == 81:
         elif len(s) == 81:
@@ -445,15 +1097,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         else:
         else:
             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
 
 
-    def _decrypt_signature_age_gate(self, s):
-        # The videos with age protection use another player, so the algorithms
-        # can be different.
-        if len(s) == 86:
-            return s[2:63] + s[82] + s[64:82] + s[63]
-        else:
-            # Fallback to the other algortihms
-            return self._decrypt_signature(s)
-
     def _get_available_subtitles(self, video_id):
     def _get_available_subtitles(self, video_id):
         try:
         try:
             sub_list = self._download_webpage(
             sub_list = self._download_webpage(
@@ -626,7 +1269,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
 
 
         # Attempt to extract SWF player URL
         # Attempt to extract SWF player URL
-        mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
+        mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
         if mobj is not None:
         if mobj is not None:
             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
         else:
         else:
@@ -702,7 +1345,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             video_thumbnail = m_thumb.group(1)
             video_thumbnail = m_thumb.group(1)
         elif 'thumbnail_url' not in video_info:
         elif 'thumbnail_url' not in video_info:
             self._downloader.report_warning(u'unable to extract video thumbnail')
             self._downloader.report_warning(u'unable to extract video thumbnail')
-            video_thumbnail = ''
+            video_thumbnail = None
         else:   # don't panic if we can't find it
         else:   # don't panic if we can't find it
             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
 
 
@@ -779,24 +1422,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                     if 'sig' in url_data:
                     if 'sig' in url_data:
                         url += '&signature=' + url_data['sig'][0]
                         url += '&signature=' + url_data['sig'][0]
                     elif 's' in url_data:
                     elif 's' in url_data:
+                        encrypted_sig = url_data['s'][0]
                         if self._downloader.params.get('verbose'):
                         if self._downloader.params.get('verbose'):
-                            s = url_data['s'][0]
                             if age_gate:
                             if age_gate:
-                                player_version = self._search_regex(r'ad3-(.+?)\.swf',
-                                    video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
-                                    'flash player', fatal=False)
-                                player = 'flash player %s' % player_version
+                                if player_url is None:
+                                    player_version = 'unknown'
+                                else:
+                                    player_version = self._search_regex(
+                                        r'-(.+)\.swf$', player_url,
+                                        u'flash player', fatal=False)
+                                player_desc = 'flash player %s' % player_version
                             else:
                             else:
-                                player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
+                                player_version = self._search_regex(
+                                    r'html5player-(.+?)\.js', video_webpage,
                                     'html5 player', fatal=False)
                                     'html5 player', fatal=False)
-                            parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
+                                player_desc = u'html5 player %s' % player_version
+
+                            parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
-                                (len(s), parts_sizes, url_data['itag'][0], player))
-                        encrypted_sig = url_data['s'][0]
-                        if age_gate:
-                            signature = self._decrypt_signature_age_gate(encrypted_sig)
-                        else:
-                            signature = self._decrypt_signature(encrypted_sig)
+                                (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
+
+                        if not age_gate:
+                            jsplayer_url_json = self._search_regex(
+                                r'"assets":.+?"js":\s*("[^"]+")',
+                                video_webpage, u'JS player URL')
+                            player_url = json.loads(jsplayer_url_json)
+
+                        signature = self._decrypt_signature(
+                            encrypted_sig, video_id, player_url, age_gate)
                         url += '&signature=' + signature
                         url += '&signature=' + signature
                     if 'ratebypass' not in url:
                     if 'ratebypass' not in url:
                         url += '&ratebypass=yes'
                         url += '&ratebypass=yes'
@@ -812,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 return
                 return
 
 
         else:
         else:
-            raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
+            raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
 
 
         results = []
         results = []
         for format_param, video_real_url in video_url_list:
         for format_param, video_real_url in video_url_list:
@@ -1007,6 +1660,9 @@ class YoutubeUserIE(InfoExtractor):
                 response = json.loads(page)
                 response = json.loads(page)
             except ValueError as err:
             except ValueError as err:
                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
+            if 'entry' not in response['feed']:
+                # Number of videos is a multiple of self._MAX_RESULTS
+                break
 
 
             # Extract video identifiers
             # Extract video identifiers
             ids_in_page = []
             ids_in_page = []

+ 42 - 32
youtube_dl/extractor/zdf.py

@@ -2,16 +2,14 @@ import re
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
+    determine_ext,
     ExtractorError,
     ExtractorError,
-    unescapeHTML,
 )
 )
 
 
+
 class ZDFIE(InfoExtractor):
 class ZDFIE(InfoExtractor):
-    _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
-    _TITLE = r'<h1(?: class="beitragHeadline")?>(?P<title>.*)</h1>'
+    _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
     _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
     _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
-    _MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
-    _RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
@@ -19,6 +17,9 @@ class ZDFIE(InfoExtractor):
             raise ExtractorError(u'Invalid URL: %s' % url)
             raise ExtractorError(u'Invalid URL: %s' % url)
         video_id = mobj.group('video_id')
         video_id = mobj.group('video_id')
 
 
+        if mobj.group('hash'):
+            url = url.replace(u'#', u'', 1)
+
         html = self._download_webpage(url, video_id)
         html = self._download_webpage(url, video_id)
         streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
         streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
         if streams is None:
         if streams is None:
@@ -27,39 +28,48 @@ class ZDFIE(InfoExtractor):
         # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
         # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
         # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
         # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
         # choose first/default media type and highest quality for now
         # choose first/default media type and highest quality for now
-        for s in streams:        #find 300 - dsl1000mbit
-            if s['quality'] == '300' and s['media_type'] == 'wstreaming':
-                stream_=s
-                break
-        for s in streams:        #find veryhigh - dsl2000mbit
-            if s['quality'] == 'veryhigh' and s['media_type'] == 'wstreaming': # 'hstreaming' - rtsp is not working
-                stream_=s
-                break
-        if stream_ is None:
+        def stream_pref(s):
+            TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming']
+            try:
+                type_pref = TYPE_ORDER.index(s['media_type'])
+            except ValueError:
+                type_pref = 999
+
+            QUALITY_ORDER = ['veryhigh', '300']
+            try:
+                quality_pref = QUALITY_ORDER.index(s['quality'])
+            except ValueError:
+                quality_pref = 999
+
+            return (type_pref, quality_pref)
+
+        sorted_streams = sorted(streams, key=stream_pref)
+        if not sorted_streams:
             raise ExtractorError(u'No stream found.')
             raise ExtractorError(u'No stream found.')
+        stream = sorted_streams[0]
 
 
-        media_link = self._download_webpage(stream_['video_url'], video_id,'Get stream URL')
+        media_link = self._download_webpage(
+            stream['video_url'],
+            video_id,
+            u'Get stream URL')
 
 
-        self.report_extraction(video_id)
-        mobj = re.search(self._TITLE, html)
-        if mobj is None:
-            raise ExtractorError(u'Cannot extract title')
-        title = unescapeHTML(mobj.group('title'))
+        MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
+        RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
 
 
-        mobj = re.search(self._MMS_STREAM, media_link)
+        mobj = re.search(self._MEDIA_STREAM, media_link)
         if mobj is None:
         if mobj is None:
-            mobj = re.search(self._RTSP_STREAM, media_link)
+            mobj = re.search(RTSP_STREAM, media_link)
             if mobj is None:
             if mobj is None:
                 raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
                 raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
-        mms_url = mobj.group('video_url')
+        video_url = mobj.group('video_url')
 
 
-        mobj = re.search('(.*)[.](?P<ext>[^.]+)', mms_url)
-        if mobj is None:
-            raise ExtractorError(u'Cannot extract extention')
-        ext = mobj.group('ext')
+        title = self._html_search_regex(
+            r'<h1(?: class="beitragHeadline")?>(.*?)</h1>',
+            html, u'title')
 
 
-        return [{'id': video_id,
-                 'url': mms_url,
-                 'title': title,
-                 'ext': ext
-                 }]
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'ext': determine_ext(video_url)
+        }

+ 23 - 12
youtube_dl/update.py

@@ -1,6 +1,9 @@
+import io
 import json
 import json
 import traceback
 import traceback
 import hashlib
 import hashlib
+import subprocess
+import sys
 from zipimport import zipimporter
 from zipimport import zipimporter
 
 
 from .utils import *
 from .utils import *
@@ -34,7 +37,7 @@ def rsa_verify(message, signature, key):
     if signature != sha256(message).digest(): return False
     if signature != sha256(message).digest(): return False
     return True
     return True
 
 
-def update_self(to_screen, verbose, filename):
+def update_self(to_screen, verbose):
     """Update the program file with the latest version from the repository"""
     """Update the program file with the latest version from the repository"""
 
 
     UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
     UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
@@ -42,7 +45,6 @@ def update_self(to_screen, verbose, filename):
     JSON_URL = UPDATE_URL + 'versions.json'
     JSON_URL = UPDATE_URL + 'versions.json'
     UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
     UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
 
 
-
     if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"):
     if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"):
         to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
         to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
         return
         return
@@ -75,11 +77,18 @@ def update_self(to_screen, verbose, filename):
         to_screen(u'ERROR: the versions file signature is invalid. Aborting.')
         to_screen(u'ERROR: the versions file signature is invalid. Aborting.')
         return
         return
 
 
-    to_screen(u'Updating to version ' + versions_info['latest'] + '...')
-    version = versions_info['versions'][versions_info['latest']]
+    version_id = versions_info['latest']
+    to_screen(u'Updating to version ' + version_id + '...')
+    version = versions_info['versions'][version_id]
 
 
     print_notes(to_screen, versions_info['versions'])
     print_notes(to_screen, versions_info['versions'])
 
 
+    filename = sys.argv[0]
+    # Py2EXE: Filename could be different
+    if hasattr(sys, "frozen") and not os.path.isfile(filename):
+        if os.path.isfile(filename + u'.exe'):
+            filename += u'.exe'
+
     if not os.access(filename, os.W_OK):
     if not os.access(filename, os.W_OK):
         to_screen(u'ERROR: no write permissions on %s' % filename)
         to_screen(u'ERROR: no write permissions on %s' % filename)
         return
         return
@@ -116,16 +125,18 @@ def update_self(to_screen, verbose, filename):
 
 
         try:
         try:
             bat = os.path.join(directory, 'youtube-dl-updater.bat')
             bat = os.path.join(directory, 'youtube-dl-updater.bat')
-            b = open(bat, 'w')
-            b.write("""
-echo Updating youtube-dl...
+            with io.open(bat, 'w') as batfile:
+                batfile.write(u"""
+@echo off
+echo Waiting for file handle to be closed ...
 ping 127.0.0.1 -n 5 -w 1000 > NUL
 ping 127.0.0.1 -n 5 -w 1000 > NUL
-move /Y "%s.new" "%s"
-del "%s"
-            \n""" %(exe, exe, bat))
-            b.close()
+move /Y "%s.new" "%s" > NUL
+echo Updated youtube-dl to version %s.
+start /b "" cmd /c del "%%~f0"&exit /b"
+                \n""" % (exe, exe, version_id))
 
 
-            os.startfile(bat)
+            subprocess.Popen([bat])  # Continues to run in the background
+            return  # Do not show premature success messages
         except (IOError, OSError) as err:
         except (IOError, OSError) as err:
             if verbose: to_screen(compat_str(traceback.format_exc()))
             if verbose: to_screen(compat_str(traceback.format_exc()))
             to_screen(u'ERROR: unable to overwrite current version')
             to_screen(u'ERROR: unable to overwrite current version')

+ 28 - 1
youtube_dl/utils.py

@@ -66,6 +66,12 @@ try:
 except ImportError:  # Python 2
 except ImportError:  # Python 2
     from urllib2 import HTTPError as compat_HTTPError
     from urllib2 import HTTPError as compat_HTTPError
 
 
+try:
+    from urllib.request import urlretrieve as compat_urlretrieve
+except ImportError:  # Python 2
+    from urllib import urlretrieve as compat_urlretrieve
+
+
 try:
 try:
     from subprocess import DEVNULL
     from subprocess import DEVNULL
     compat_subprocess_get_DEVNULL = lambda: DEVNULL
     compat_subprocess_get_DEVNULL = lambda: DEVNULL
@@ -700,7 +706,16 @@ def unified_strdate(date_str):
     date_str = date_str.replace(',',' ')
     date_str = date_str.replace(',',' ')
     # %z (UTC offset) is only supported in python>=3.2
     # %z (UTC offset) is only supported in python>=3.2
     date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
     date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
-    format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
+    format_expressions = [
+        '%d %B %Y',
+        '%B %d %Y',
+        '%b %d %Y',
+        '%Y-%m-%d',
+        '%d/%m/%Y',
+        '%Y/%m/%d %H:%M:%S',
+        '%d.%m.%Y %H:%M',
+        '%Y-%m-%dT%H:%M:%SZ',
+    ]
     for expression in format_expressions:
     for expression in format_expressions:
         try:
         try:
             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@@ -781,6 +796,18 @@ def platform_name():
     return res
     return res
 
 
 
 
+def write_string(s, out=None):
+    if out is None:
+        out = sys.stderr
+    assert type(s) == type(u'')
+
+    if ('b' in getattr(out, 'mode', '') or
+            sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
+        s = s.encode(preferredencoding(), 'ignore')
+    out.write(s)
+    out.flush()
+
+
 def bytes_to_intlist(bs):
 def bytes_to_intlist(bs):
     if not bs:
     if not bs:
         return []
         return []

+ 1 - 1
youtube_dl/version.py

@@ -1,2 +1,2 @@
 
 
-__version__ = '2013.09.12'
+__version__ = '2013.09.29'