Przeglądaj źródła

Merge branch 'master' into subtitles_rework

Ismael Mejia 12 lat temu
rodzic
commit
72836fcee4
49 zmienionych plików z 1682 dodań i 159 usunięć
  1. 12 7
      README.md
  2. 5 1
      devscripts/bash-completion.in
  3. 18 11
      devscripts/gh-pages/add-version.py
  4. 1 2
      devscripts/gh-pages/update-feed.py
  5. 33 0
      devscripts/gh-pages/update-sites.py
  6. 2 1
      devscripts/release.sh
  7. 6 6
      devscripts/youtube_genalgo.py
  8. 42 20
      test/test_all_urls.py
  9. 3 4
      test/test_download.py
  10. 17 11
      youtube_dl/FileDownloader.py
  11. 3 2
      youtube_dl/PostProcessor.py
  12. 18 5
      youtube_dl/YoutubeDL.py
  13. 21 5
      youtube_dl/__init__.py
  14. 202 0
      youtube_dl/aes.py
  15. 20 2
      youtube_dl/extractor/__init__.py
  16. 75 0
      youtube_dl/extractor/addanime.py
  17. 166 0
      youtube_dl/extractor/appletrailers.py
  18. 2 2
      youtube_dl/extractor/c56.py
  19. 35 0
      youtube_dl/extractor/canalc2.py
  20. 1 1
      youtube_dl/extractor/canalplus.py
  21. 58 0
      youtube_dl/extractor/cnn.py
  22. 13 3
      youtube_dl/extractor/common.py
  23. 5 3
      youtube_dl/extractor/dailymotion.py
  24. 74 0
      youtube_dl/extractor/daum.py
  25. 39 0
      youtube_dl/extractor/defense.py
  26. 13 5
      youtube_dl/extractor/generic.py
  27. 2 2
      youtube_dl/extractor/googleplus.py
  28. 37 0
      youtube_dl/extractor/hark.py
  29. 6 2
      youtube_dl/extractor/ign.py
  30. 4 2
      youtube_dl/extractor/kankan.py
  31. 1 1
      youtube_dl/extractor/metacafe.py
  32. 55 0
      youtube_dl/extractor/metacritic.py
  33. 74 0
      youtube_dl/extractor/mit.py
  34. 73 0
      youtube_dl/extractor/naver.py
  35. 33 0
      youtube_dl/extractor/nbc.py
  36. 54 0
      youtube_dl/extractor/orf.py
  37. 42 0
      youtube_dl/extractor/ro220.py
  38. 15 2
      youtube_dl/extractor/rtlnow.py
  39. 90 0
      youtube_dl/extractor/sohu.py
  40. 73 0
      youtube_dl/extractor/trilulilu.py
  41. 1 1
      youtube_dl/extractor/unistra.py
  42. 56 0
      youtube_dl/extractor/veehd.py
  43. 37 11
      youtube_dl/extractor/vimeo.py
  44. 0 1
      youtube_dl/extractor/wat.py
  45. 10 8
      youtube_dl/extractor/xhamster.py
  46. 14 4
      youtube_dl/extractor/youporn.py
  47. 63 26
      youtube_dl/extractor/youtube.py
  48. 57 7
      youtube_dl/utils.py
  49. 1 1
      youtube_dl/version.py

+ 12 - 7
README.md

@@ -113,25 +113,28 @@ which means you can modify it, redistribute it or use it however you like.
 
 
 ## Video Format Options:
 ## Video Format Options:
     -f, --format FORMAT        video format code, specifiy the order of
     -f, --format FORMAT        video format code, specifiy the order of
-                               preference using slashes: "-f 22/17/18"
+                               preference using slashes: "-f 22/17/18". "-f mp4"
+                               and "-f flv" are also supported
     --all-formats              download all available video formats
     --all-formats              download all available video formats
     --prefer-free-formats      prefer free video formats unless a specific one
     --prefer-free-formats      prefer free video formats unless a specific one
                                is requested
                                is requested
     --max-quality FORMAT       highest quality format to download
     --max-quality FORMAT       highest quality format to download
     -F, --list-formats         list all available formats (currently youtube
     -F, --list-formats         list all available formats (currently youtube
                                only)
                                only)
+
+## Subtitle Options:
     --write-sub                write subtitle file (currently youtube only)
     --write-sub                write subtitle file (currently youtube only)
     --write-auto-sub           write automatic subtitle file (currently youtube
     --write-auto-sub           write automatic subtitle file (currently youtube
                                only)
                                only)
     --only-sub                 [deprecated] alias of --skip-download
     --only-sub                 [deprecated] alias of --skip-download
     --all-subs                 downloads all the available subtitles of the
     --all-subs                 downloads all the available subtitles of the
-                               video (currently youtube only)
+                               video
     --list-subs                lists all available subtitles for the video
     --list-subs                lists all available subtitles for the video
-                               (currently youtube only)
-    --sub-format FORMAT        subtitle format [srt/sbv/vtt] (default=srt)
-                               (currently youtube only)
-    --sub-lang LANG            language of the subtitles to download (optional)
-                               use IETF language tags like 'en'
+    --sub-format FORMAT        subtitle format (default=srt) ([sbv/vtt] youtube
+                               only)
+    --sub-lang LANGS           languages of the subtitles to download (optional)
+                               separated by commas, use IETF language tags like
+                               'en,pt'
 
 
 ## Authentication Options:
 ## Authentication Options:
     -u, --username USERNAME    account username
     -u, --username USERNAME    account username
@@ -153,6 +156,8 @@ which means you can modify it, redistribute it or use it however you like.
                                processing; the video is erased by default
                                processing; the video is erased by default
     --no-post-overwrites       do not overwrite post-processed files; the post-
     --no-post-overwrites       do not overwrite post-processed files; the post-
                                processed files are overwritten by default
                                processed files are overwritten by default
+    --embed-subs               embed subtitles in the video (only for mp4
+                               videos)
 
 
 # CONFIGURATION
 # CONFIGURATION
 
 

+ 5 - 1
devscripts/bash-completion.in

@@ -4,8 +4,12 @@ __youtube-dl()
     COMPREPLY=()
     COMPREPLY=()
     cur="${COMP_WORDS[COMP_CWORD]}"
     cur="${COMP_WORDS[COMP_CWORD]}"
     opts="{{flags}}"
     opts="{{flags}}"
+    keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
 
 
-    if [[ ${cur} == * ]] ; then
+    if [[ ${cur} =~ : ]]; then
+        COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
+        return 0
+    elif [[ ${cur} == * ]] ; then
         COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
         COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
         return 0
         return 0
     fi
     fi

+ 18 - 11
devscripts/gh-pages/add-version.py

@@ -6,28 +6,35 @@ import hashlib
 import urllib.request
 import urllib.request
 
 
 if len(sys.argv) <= 1:
 if len(sys.argv) <= 1:
-	print('Specify the version number as parameter')
-	sys.exit()
+    print('Specify the version number as parameter')
+    sys.exit()
 version = sys.argv[1]
 version = sys.argv[1]
 
 
 with open('update/LATEST_VERSION', 'w') as f:
 with open('update/LATEST_VERSION', 'w') as f:
-	f.write(version)
+    f.write(version)
 
 
 versions_info = json.load(open('update/versions.json'))
 versions_info = json.load(open('update/versions.json'))
 if 'signature' in versions_info:
 if 'signature' in versions_info:
-	del versions_info['signature']
+    del versions_info['signature']
 
 
 new_version = {}
 new_version = {}
 
 
-filenames = {'bin': 'youtube-dl', 'exe': 'youtube-dl.exe', 'tar': 'youtube-dl-%s.tar.gz' % version}
+filenames = {
+    'bin': 'youtube-dl',
+    'exe': 'youtube-dl.exe',
+    'tar': 'youtube-dl-%s.tar.gz' % version}
+build_dir = os.path.join('..', '..', 'build', version)
 for key, filename in filenames.items():
 for key, filename in filenames.items():
-	print('Downloading and checksumming %s...' %filename)
-	url = 'http://youtube-dl.org/downloads/%s/%s' % (version, filename)
-	data = urllib.request.urlopen(url).read()
-	sha256sum = hashlib.sha256(data).hexdigest()
-	new_version[key] = (url, sha256sum)
+    fn = os.path.join(build_dir, filename)
+    with open(fn, 'rb') as f:
+        data = f.read()
+    if not data:
+        raise ValueError('File %s is empty!' % fn)
+    sha256sum = hashlib.sha256(data).hexdigest()
+    new_version[key] = (url, sha256sum)
 
 
 versions_info['versions'][version] = new_version
 versions_info['versions'][version] = new_version
 versions_info['latest'] = version
 versions_info['latest'] = version
 
 
-json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True)
+with open('update/versions.json', 'w') as jsonf:
+    json.dump(versions_info, jsonf, indent=4, sort_keys=True)

+ 1 - 2
devscripts/gh-pages/update-feed.py

@@ -22,7 +22,7 @@ entry_template=textwrap.dedent("""
 									<atom:link href="http://rg3.github.io/youtube-dl" />
 									<atom:link href="http://rg3.github.io/youtube-dl" />
 									<atom:content type="xhtml">
 									<atom:content type="xhtml">
 										<div xmlns="http://www.w3.org/1999/xhtml">
 										<div xmlns="http://www.w3.org/1999/xhtml">
-											Downloads available at <a href="http://youtube-dl.org/downloads/@VERSION@/">http://youtube-dl.org/downloads/@VERSION@/</a>
+											Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a>
 										</div>
 										</div>
 									</atom:content>
 									</atom:content>
 									<atom:author>
 									<atom:author>
@@ -54,4 +54,3 @@ atom_template = atom_template.replace('@ENTRIES@', entries_str)
 with open('update/releases.atom','w',encoding='utf-8') as atom_file:
 with open('update/releases.atom','w',encoding='utf-8') as atom_file:
 	atom_file.write(atom_template)
 	atom_file.write(atom_template)
 
 
-

+ 33 - 0
devscripts/gh-pages/update-sites.py

@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import textwrap
+
+# We must be able to import youtube_dl
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+import youtube_dl
+
+def main():
+    with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
+        template = tmplf.read()
+
+    ie_htmls = []
+    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
+        ie_html = '<b>{}</b>'.format(ie.IE_NAME)
+        try:
+            ie_html += ': {}'.format(ie.IE_DESC)
+        except AttributeError:
+            pass
+        if ie.working() == False:
+            ie_html += ' (Currently broken)'
+        ie_htmls.append('<li>{}</li>'.format(ie_html))
+
+    template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
+
+    with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
+        sitesf.write(template)
+
+if __name__ == '__main__':
+    main()

+ 2 - 1
devscripts/release.sh

@@ -67,7 +67,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
 (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
 (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
 git checkout HEAD -- youtube-dl youtube-dl.exe
 git checkout HEAD -- youtube-dl youtube-dl.exe
 
 
-/bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..."
+/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
 for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
 for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
 scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
 scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
 ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
 ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
@@ -85,6 +85,7 @@ ROOT=$(pwd)
     "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
     "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
     "$ROOT/devscripts/gh-pages/generate-download.py"
     "$ROOT/devscripts/gh-pages/generate-download.py"
     "$ROOT/devscripts/gh-pages/update-copyright.py"
     "$ROOT/devscripts/gh-pages/update-copyright.py"
+    "$ROOT/devscripts/gh-pages/update-sites.py"
     git add *.html *.html.in update
     git add *.html *.html.in update
     git commit -m "release $version"
     git commit -m "release $version"
     git show HEAD
     git show HEAD

+ 6 - 6
devscripts/youtube_genalgo.py

@@ -14,21 +14,21 @@ tests = [
     # 89 
     # 89 
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
      "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
      "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
-    # 88
+    # 88 - vflapUV9V 2013/08/28
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
-     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
+     "ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
     # 87
     # 87
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
      "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
      "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
-    # 86
+    # 86 - vfluy6kdb 2013/09/06
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
-     "yuioplkjhgfdsazecvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
+     "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
     # 85
     # 85
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
      ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
      ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
-    # 84
+    # 84 - vflg0g8PQ 2013/08/29 (sporadic)
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
-     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
+     ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
     # 83
     # 83
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
      ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
      ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),

+ 42 - 20
test/test_all_urls.py

@@ -11,24 +11,49 @@ from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE,
 from helper import get_testcases
 from helper import get_testcases
 
 
 class TestAllURLsMatching(unittest.TestCase):
 class TestAllURLsMatching(unittest.TestCase):
+    def setUp(self):
+        self.ies = gen_extractors()
+
+    def matching_ies(self, url):
+        return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic']
+
+    def assertMatch(self, url, ie_list):
+        self.assertEqual(self.matching_ies(url), ie_list)
+
     def test_youtube_playlist_matching(self):
     def test_youtube_playlist_matching(self):
-        self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
-        self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
-        self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M'))
+        assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
+        assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
+        assertPlaylist(u'PL63F0C78739B09958')
+        assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
+        assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
+        assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
+        self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
 
 
     def test_youtube_matching(self):
     def test_youtube_matching(self):
         self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
         self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
         self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
         self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
+        self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
+        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
 
 
     def test_youtube_channel_matching(self):
     def test_youtube_channel_matching(self):
-        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
-        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
-        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
+        assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
+
+    def test_youtube_user_matching(self):
+        self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
+
+    def test_youtube_feeds(self):
+        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
+        self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
+        self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
+        self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
+
+    def test_youtube_show_matching(self):
+        self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
 
 
     def test_justin_tv_channelid_matching(self):
     def test_justin_tv_channelid_matching(self):
         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
@@ -63,15 +88,12 @@ class TestAllURLsMatching(unittest.TestCase):
                     self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
                     self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
 
 
     def test_keywords(self):
     def test_keywords(self):
-        ies = gen_extractors()
-        matching_ies = lambda url: [ie.IE_NAME for ie in ies
-                                    if ie.suitable(url) and ie.IE_NAME != 'generic']
-        self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions'])
-        self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions'])
-        self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral'])
-        self.assertEqual(matching_ies(':tds'), ['ComedyCentral'])
-        self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral'])
-        self.assertEqual(matching_ies(':cr'), ['ComedyCentral'])
+        self.assertMatch(':ytsubs', ['youtube:subscriptions'])
+        self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
+        self.assertMatch(':thedailyshow', ['ComedyCentral'])
+        self.assertMatch(':tds', ['ComedyCentral'])
+        self.assertMatch(':colbertreport', ['ComedyCentral'])
+        self.assertMatch(':cr', ['ComedyCentral'])
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':

+ 3 - 4
test/test_download.py

@@ -127,12 +127,11 @@ def generator(test_case):
                     info_dict = json.load(infof)
                     info_dict = json.load(infof)
                 for (info_field, expected) in tc.get('info_dict', {}).items():
                 for (info_field, expected) in tc.get('info_dict', {}).items():
                     if isinstance(expected, compat_str) and expected.startswith('md5:'):
                     if isinstance(expected, compat_str) and expected.startswith('md5:'):
-                        self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
+                        got = 'md5:' + md5(info_dict.get(info_field))
                     else:
                     else:
                         got = info_dict.get(info_field)
                         got = info_dict.get(info_field)
-                        self.assertEqual(
-                            expected, got,
-                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+                    self.assertEqual(expected, got,
+                        u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
 
 
                 # If checkable fields are missing from the test case, print the info_dict
                 # If checkable fields are missing from the test case, print the info_dict
                 test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
                 test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))

+ 17 - 11
youtube_dl/FileDownloader.py

@@ -63,6 +63,17 @@ class FileDownloader(object):
         converted = float(bytes) / float(1024 ** exponent)
         converted = float(bytes) / float(1024 ** exponent)
         return '%.2f%s' % (converted, suffix)
         return '%.2f%s' % (converted, suffix)
 
 
+    @staticmethod
+    def format_seconds(seconds):
+        (mins, secs) = divmod(seconds, 60)
+        (hours, eta_mins) = divmod(mins, 60)
+        if hours > 99:
+            return '--:--:--'
+        if hours == 0:
+            return '%02d:%02d' % (mins, secs)
+        else:
+            return '%02d:%02d:%02d' % (hours, mins, secs)
+
     @staticmethod
     @staticmethod
     def calc_percent(byte_counter, data_len):
     def calc_percent(byte_counter, data_len):
         if data_len is None:
         if data_len is None:
@@ -78,14 +89,7 @@ class FileDownloader(object):
             return '--:--'
             return '--:--'
         rate = float(current) / dif
         rate = float(current) / dif
         eta = int((float(total) - float(current)) / rate)
         eta = int((float(total) - float(current)) / rate)
-        (eta_mins, eta_secs) = divmod(eta, 60)
-        (eta_hours, eta_mins) = divmod(eta_mins, 60)
-        if eta_hours > 99:
-            return '--:--:--'
-        if eta_hours == 0:
-            return '%02d:%02d' % (eta_mins, eta_secs)
-        else:
-            return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
+        return FileDownloader.format_seconds(eta)
 
 
     @staticmethod
     @staticmethod
     def calc_speed(start, now, bytes):
     def calc_speed(start, now, bytes):
@@ -234,12 +238,14 @@ class FileDownloader(object):
         """Report it was impossible to resume download."""
         """Report it was impossible to resume download."""
         self.to_screen(u'[download] Unable to resume')
         self.to_screen(u'[download] Unable to resume')
 
 
-    def report_finish(self):
+    def report_finish(self, data_len_str, tot_time):
         """Report download finished."""
         """Report download finished."""
         if self.params.get('noprogress', False):
         if self.params.get('noprogress', False):
             self.to_screen(u'[download] Download completed')
             self.to_screen(u'[download] Download completed')
         else:
         else:
-            self.to_screen(u'')
+            clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
+            self.to_screen(u'\r%s[download] 100%% of %s in %s' %
+                (clear_line, data_len_str, self.format_seconds(tot_time)))
 
 
     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
         self.report_destination(filename)
         self.report_destination(filename)
@@ -542,7 +548,7 @@ class FileDownloader(object):
             self.report_error(u'Did not get any data blocks')
             self.report_error(u'Did not get any data blocks')
             return False
             return False
         stream.close()
         stream.close()
-        self.report_finish()
+        self.report_finish(data_len_str, (time.time() - start))
         if data_len is not None and byte_counter != data_len:
         if data_len is not None and byte_counter != data_len:
             raise ContentTooShortError(byte_counter, int(data_len))
             raise ContentTooShortError(byte_counter, int(data_len))
         self.try_rename(tmpfilename, filename)
         self.try_rename(tmpfilename, filename)

+ 3 - 2
youtube_dl/PostProcessor.py

@@ -137,7 +137,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         try:
         try:
             FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
             FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
         except FFmpegPostProcessorError as err:
         except FFmpegPostProcessorError as err:
-            raise AudioConversionError(err.message)
+            raise AudioConversionError(err.msg)
 
 
     def run(self, information):
     def run(self, information):
         path = information['filepath']
         path = information['filepath']
@@ -207,7 +207,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         except:
         except:
             etype,e,tb = sys.exc_info()
             etype,e,tb = sys.exc_info()
             if isinstance(e, AudioConversionError):
             if isinstance(e, AudioConversionError):
-                msg = u'audio conversion failed: ' + e.message
+                msg = u'audio conversion failed: ' + e.msg
             else:
             else:
                 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
                 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
             raise PostProcessingError(msg)
             raise PostProcessingError(msg)
@@ -458,6 +458,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
         opts.extend(['-f', 'mp4'])
         opts.extend(['-f', 'mp4'])
 
 
         temp_filename = filename + u'.temp'
         temp_filename = filename + u'.temp'
+        self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
         os.remove(encodeFilename(filename))
         os.remove(encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))

+ 18 - 5
youtube_dl/YoutubeDL.py

@@ -76,7 +76,7 @@ class YoutubeDL(object):
     allsubtitles:      Downloads all the subtitles of the video
     allsubtitles:      Downloads all the subtitles of the video
     listsubtitles:     Lists all available subtitles for the video
     listsubtitles:     Lists all available subtitles for the video
     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
-    subtitleslangs:     Language of the subtitles to download
+    subtitleslangs:    List of languages of the subtitles to download
     keepvideo:         Keep the video file after post-processing
     keepvideo:         Keep the video file after post-processing
     daterange:         A DateRange object, download only if the upload_date is in the range.
     daterange:         A DateRange object, download only if the upload_date is in the range.
     skip_download:     Skip the actual download of the video file
     skip_download:     Skip the actual download of the video file
@@ -97,6 +97,7 @@ class YoutubeDL(object):
     def __init__(self, params):
     def __init__(self, params):
         """Create a FileDownloader object with the given options."""
         """Create a FileDownloader object with the given options."""
         self._ies = []
         self._ies = []
+        self._ies_instances = {}
         self._pps = []
         self._pps = []
         self._progress_hooks = []
         self._progress_hooks = []
         self._download_retcode = 0
         self._download_retcode = 0
@@ -111,8 +112,21 @@ class YoutubeDL(object):
     def add_info_extractor(self, ie):
     def add_info_extractor(self, ie):
         """Add an InfoExtractor object to the end of the list."""
         """Add an InfoExtractor object to the end of the list."""
         self._ies.append(ie)
         self._ies.append(ie)
+        self._ies_instances[ie.ie_key()] = ie
         ie.set_downloader(self)
         ie.set_downloader(self)
 
 
+    def get_info_extractor(self, ie_key):
+        """
+        Get an instance of an IE with name ie_key, it will try to get one from
+        the _ies list, if there's no instance it will create a new one and add
+        it to the extractor list.
+        """
+        ie = self._ies_instances.get(ie_key)
+        if ie is None:
+            ie = get_info_extractor(ie_key)()
+            self.add_info_extractor(ie)
+        return ie
+
     def add_default_info_extractors(self):
     def add_default_info_extractors(self):
         """
         """
         Add the InfoExtractors returned by gen_extractors to the end of the list
         Add the InfoExtractors returned by gen_extractors to the end of the list
@@ -294,9 +308,7 @@ class YoutubeDL(object):
          '''
          '''
         
         
         if ie_key:
         if ie_key:
-            ie = get_info_extractor(ie_key)()
-            ie.set_downloader(self)
-            ies = [ie]
+            ies = [self.get_info_extractor(ie_key)]
         else:
         else:
             ies = self._ies
             ies = self._ies
 
 
@@ -448,7 +460,8 @@ class YoutubeDL(object):
         if self.params.get('forceid', False):
         if self.params.get('forceid', False):
             compat_print(info_dict['id'])
             compat_print(info_dict['id'])
         if self.params.get('forceurl', False):
         if self.params.get('forceurl', False):
-            compat_print(info_dict['url'])
+            # For RTMP URLs, also include the playpath
+            compat_print(info_dict['url'] + info_dict.get('play_path', u''))
         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
             compat_print(info_dict['thumbnail'])
             compat_print(info_dict['thumbnail'])
         if self.params.get('forcedescription', False) and 'description' in info_dict:
         if self.params.get('forcedescription', False) and 'description' in info_dict:

+ 21 - 5
youtube_dl/__init__.py

@@ -28,6 +28,7 @@ __authors__  = (
     'Axel Noack',
     'Axel Noack',
     'Albert Kim',
     'Albert Kim',
     'Pierre Rudloff',
     'Pierre Rudloff',
+    'Huarong Huo',
 )
 )
 
 
 __license__ = 'Public Domain'
 __license__ = 'Public Domain'
@@ -45,6 +46,7 @@ import sys
 import warnings
 import warnings
 import platform
 import platform
 
 
+
 from .utils import *
 from .utils import *
 from .update import update_self
 from .update import update_self
 from .version import __version__
 from .version import __version__
@@ -99,6 +101,16 @@ def parseOpts(overrideArguments=None):
             pass
             pass
         return None
         return None
 
 
+    def _hide_login_info(opts):
+        opts = list(opts)
+        for private_opt in ['-p', '--password', '-u', '--username']:
+            try:
+                i = opts.index(private_opt)
+                opts[i+1] = '<PRIVATE>'
+            except ValueError:
+                pass
+        return opts
+
     max_width = 80
     max_width = 80
     max_help_position = 80
     max_help_position = 80
 
 
@@ -181,7 +193,7 @@ def parseOpts(overrideArguments=None):
 
 
     video_format.add_option('-f', '--format',
     video_format.add_option('-f', '--format',
             action='store', dest='format', metavar='FORMAT',
             action='store', dest='format', metavar='FORMAT',
-            help='video format code, specifiy the order of preference using slashes: "-f 22/17/18"')
+            help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
     video_format.add_option('--all-formats',
     video_format.add_option('--all-formats',
             action='store_const', dest='format', help='download all available video formats', const='all')
             action='store_const', dest='format', help='download all available video formats', const='all')
     video_format.add_option('--prefer-free-formats',
     video_format.add_option('--prefer-free-formats',
@@ -354,9 +366,9 @@ def parseOpts(overrideArguments=None):
         argv = systemConf + userConf + commandLineConf
         argv = systemConf + userConf + commandLineConf
         opts, args = parser.parse_args(argv)
         opts, args = parser.parse_args(argv)
         if opts.verbose:
         if opts.verbose:
-            sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n')
-            sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n')
-            sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n')
+            sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+            sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+            sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
 
 
     return parser, opts, args
     return parser, opts, args
 
 
@@ -427,6 +439,10 @@ def _real_main(argv=None):
     proxy_handler = compat_urllib_request.ProxyHandler(proxies)
     proxy_handler = compat_urllib_request.ProxyHandler(proxies)
     https_handler = make_HTTPS_handler(opts)
     https_handler = make_HTTPS_handler(opts)
     opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
     opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+    # Delete the default user-agent header, which would otherwise apply in
+    # cases where our custom HTTP handler doesn't come into play
+    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+    opener.addheaders =[]
     compat_urllib_request.install_opener(opener)
     compat_urllib_request.install_opener(opener)
     socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
     socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
 
 
@@ -604,7 +620,7 @@ def _real_main(argv=None):
                 sys.exc_clear()
                 sys.exc_clear()
             except:
             except:
                 pass
                 pass
-        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
+        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
         sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
         sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
 
 
     ydl.add_default_info_extractors()
     ydl.add_default_info_extractors()

+ 202 - 0
youtube_dl/aes.py

@@ -0,0 +1,202 @@
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
+
+import base64
+from math import ceil
+
+from .utils import bytes_to_intlist, intlist_to_bytes
+
+BLOCK_SIZE_BYTES = 16
+
+def aes_ctr_decrypt(data, key, counter):
+    """
+    Decrypt with aes in counter mode
+    
+    @param {int[]} data        cipher
+    @param {int[]} key         16/24/32-Byte cipher key
+    @param {instance} counter  Instance whose next_value function (@returns {int[]}  16-Byte block)
+                               returns the next counter block
+    @returns {int[]}           decrypted data
+    """
+    expanded_key = key_expansion(key)
+    block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+    
+    decrypted_data=[]
+    for i in range(block_count):
+        counter_block = counter.next_value()
+        block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
+        block += [0]*(BLOCK_SIZE_BYTES - len(block))
+        
+        cipher_counter_block = aes_encrypt(counter_block, expanded_key)
+        decrypted_data += xor(block, cipher_counter_block)
+    decrypted_data = decrypted_data[:len(data)]
+    
+    return decrypted_data
+
+def key_expansion(data):
+    """
+    Generate key schedule
+    
+    @param {int[]} data  16/24/32-Byte cipher key
+    @returns {int[]}     176/208/240-Byte expanded key 
+    """
+    data = data[:] # copy
+    rcon_iteration = 1
+    key_size_bytes = len(data)
+    expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
+    
+    while len(data) < expanded_key_size_bytes:
+        temp = data[-4:]
+        temp = key_schedule_core(temp, rcon_iteration)
+        rcon_iteration += 1
+        data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+        
+        for _ in range(3):
+            temp = data[-4:]
+            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+        
+        if key_size_bytes == 32:
+            temp = data[-4:]
+            temp = sub_bytes(temp)
+            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+        
+        for _ in range(3 if key_size_bytes == 32  else 2 if key_size_bytes == 24 else 0):
+            temp = data[-4:]
+            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+    data = data[:expanded_key_size_bytes]
+    
+    return data
+
+def aes_encrypt(data, expanded_key):
+    """
+    Encrypt one block with aes
+    
+    @param {int[]} data          16-Byte state
+    @param {int[]} expanded_key  176/208/240-Byte expanded key 
+    @returns {int[]}             16-Byte cipher
+    """
+    rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+    
+    data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+    for i in range(1, rounds+1):
+        data = sub_bytes(data)
+        data = shift_rows(data)
+        if i != rounds:
+            data = mix_columns(data)
+        data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+    
+    return data
+
+def aes_decrypt_text(data, password, key_size_bytes):
+    """
+    Decrypt text
+    - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
+    - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
+      with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
+    - Mode of operation is 'counter'
+    
+    @param {str} data                    Base64 encoded string
+    @param {str,unicode} password        Password (will be encoded with utf-8)
+    @param {int} key_size_bytes          Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
+    @returns {str}                       Decrypted data
+    """
+    NONCE_LENGTH_BYTES = 8
+    
+    data = bytes_to_intlist(base64.b64decode(data))
+    password = bytes_to_intlist(password.encode('utf-8'))
+    
+    key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
+    key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
+    
+    nonce = data[:NONCE_LENGTH_BYTES]
+    cipher = data[NONCE_LENGTH_BYTES:]
+    
+    class Counter:
+        __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+        def next_value(self):
+            temp = self.__value
+            self.__value = inc(self.__value)
+            return temp
+    
+    decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
+    plaintext = intlist_to_bytes(decrypted_data)
+    
+    return plaintext
+
+RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
+SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+        0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+        0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+        0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+        0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+        0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+        0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+        0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+        0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+        0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+        0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+        0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+        0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+        0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+        0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+        0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
+MIX_COLUMN_MATRIX = ((2,3,1,1),
+                     (1,2,3,1),
+                     (1,1,2,3),
+                     (3,1,1,2))
+
+def sub_bytes(data):
+    return [SBOX[x] for x in data]
+
+def rotate(data):
+    return data[1:] + [data[0]]
+
+def key_schedule_core(data, rcon_iteration):
+    data = rotate(data)
+    data = sub_bytes(data)
+    data[0] = data[0] ^ RCON[rcon_iteration]
+    
+    return data
+
+def xor(data1, data2):
+    return [x^y for x, y in zip(data1, data2)]
+
+def mix_column(data):
+    data_mixed = []
+    for row in range(4):
+        mixed = 0
+        for column in range(4):
+            addend = data[column]
+            if MIX_COLUMN_MATRIX[row][column] in (2,3):
+                addend <<= 1
+                if addend > 0xff:
+                    addend &= 0xff
+                    addend ^= 0x1b
+                if MIX_COLUMN_MATRIX[row][column] == 3:
+                    addend ^= data[column]
+            mixed ^= addend & 0xff
+        data_mixed.append(mixed)
+    return data_mixed
+
+def mix_columns(data):
+    data_mixed = []
+    for i in range(4):
+        column = data[i*4 : (i+1)*4]
+        data_mixed += mix_column(column)
+    return data_mixed
+
+def shift_rows(data):
+    data_shifted = []
+    for column in range(4):
+        for row in range(4):
+            data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
+    return data_shifted
+
+def inc(data):
+    data = data[:] # copy
+    for i in range(len(data)-1,-1,-1):
+        if data[i] == 255:
+            data[i] = 0
+        else:
+            data[i] = data[i] + 1
+            break
+    return data

+ 20 - 2
youtube_dl/extractor/__init__.py

@@ -1,3 +1,5 @@
+from .appletrailers import AppleTrailersIE
+from .addanime import AddAnimeIE
 from .archiveorg import ArchiveOrgIE
 from .archiveorg import ArchiveOrgIE
 from .ard import ARDIE
 from .ard import ARDIE
 from .arte import ArteTvIE
 from .arte import ArteTvIE
@@ -6,16 +8,21 @@ from .bandcamp import BandcampIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .breakcom import BreakIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
 from .brightcove import BrightcoveIE
+from .c56 import C56IE
 from .canalplus import CanalplusIE
 from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .cnn import CNNIE
 from .collegehumor import CollegeHumorIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE
 from .comedycentral import ComedyCentralIE
 from .condenast import CondeNastIE
 from .condenast import CondeNastIE
 from .criterion import CriterionIE
 from .criterion import CriterionIE
 from .cspan import CSpanIE
 from .cspan import CSpanIE
 from .dailymotion import DailymotionIE, DailymotionPlaylistIE
 from .dailymotion import DailymotionIE, DailymotionPlaylistIE
+from .daum import DaumIE
 from .depositfiles import DepositFilesIE
 from .depositfiles import DepositFilesIE
 from .dotsub import DotsubIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
 from .dreisat import DreiSatIE
+from .defense import DefenseGouvFrIE
 from .ehow import EHowIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .eighttracks import EightTracksIE
 from .escapist import EscapistIE
 from .escapist import EscapistIE
@@ -29,6 +36,7 @@ from .gametrailers import GametrailersIE
 from .generic import GenericIE
 from .generic import GenericIE
 from .googleplus import GooglePlusIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .googlesearch import GoogleSearchIE
+from .hark import HarkIE
 from .hotnewhiphop import HotNewHipHopIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .howcast import HowcastIE
 from .hypem import HypemIE
 from .hypem import HypemIE
@@ -44,23 +52,30 @@ from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE
 from .livestream import LivestreamIE
 from .metacafe import MetacafeIE
 from .metacafe import MetacafeIE
+from .metacritic import MetacriticIE
+from .mit import TechTVMITIE, MITIE
 from .mixcloud import MixcloudIE
 from .mixcloud import MixcloudIE
 from .mtv import MTVIE
 from .mtv import MTVIE
 from .muzu import MuzuTVIE
 from .muzu import MuzuTVIE
 from .myspass import MySpassIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .myvideo import MyVideoIE
+from .naver import NaverIE
 from .nba import NBAIE
 from .nba import NBAIE
+from .nbc import NBCNewsIE
 from .ooyala import OoyalaIE
 from .ooyala import OoyalaIE
+from .orf import ORFIE
 from .pbs import PBSIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .photobucket import PhotobucketIE
 from .pornotube import PornotubeIE
 from .pornotube import PornotubeIE
 from .rbmaradio import RBMARadioIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
 from .redtube import RedTubeIE
 from .ringtv import RingTVIE
 from .ringtv import RingTVIE
+from .ro220 import Ro220IE
 from .roxwel import RoxwelIE
 from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
 from .rtlnow import RTLnowIE
 from .sina import SinaIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
 from .slashdot import SlashdotIE
+from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .spiegel import SpiegelIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .stanfordoc import StanfordOpenClassroomIE
@@ -71,18 +86,19 @@ from .ted import TEDIE
 from .tf1 import TF1IE
 from .tf1 import TF1IE
 from .thisav import ThisAVIE
 from .thisav import ThisAVIE
 from .traileraddict import TrailerAddictIE
 from .traileraddict import TrailerAddictIE
+from .trilulilu import TriluliluIE
 from .tudou import TudouIE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
 from .tutv import TutvIE
-from .ustream import UstreamIE
 from .unistra import UnistraIE
 from .unistra import UnistraIE
+from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .vbox7 import Vbox7IE
+from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .vevo import VevoIE
 from .videofyme import VideofyMeIE
 from .videofyme import VideofyMeIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .vine import VineIE
-from .c56 import C56IE
 from .wat import WatIE
 from .wat import WatIE
 from .weibo import WeiboIE
 from .weibo import WeiboIE
 from .wimp import WimpIE
 from .wimp import WimpIE
@@ -116,12 +132,14 @@ _ALL_CLASSES = [
 ]
 ]
 _ALL_CLASSES.append(GenericIE)
 _ALL_CLASSES.append(GenericIE)
 
 
+
 def gen_extractors():
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
     """ Return a list of an instance of every supported extractor.
     The order does matter; the first extractor matched is the one handling the URL.
     The order does matter; the first extractor matched is the one handling the URL.
     """
     """
     return [klass() for klass in _ALL_CLASSES]
     return [klass() for klass in _ALL_CLASSES]
 
 
+
 def get_info_extractor(ie_name):
 def get_info_extractor(ie_name):
     """Returns the info extractor class with the given ie_name"""
     """Returns the info extractor class with the given ie_name"""
     return globals()[ie_name+'IE']
     return globals()[ie_name+'IE']

+ 75 - 0
youtube_dl/extractor/addanime.py

@@ -0,0 +1,75 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_HTTPError,
+    compat_str,
+    compat_urllib_parse,
+    compat_urllib_parse_urlparse,
+
+    ExtractorError,
+)
+
+
+class AddAnimeIE(InfoExtractor):
+
+    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
+    IE_NAME = u'AddAnime'
+    _TEST = {
+        u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
+        u'file': u'24MR3YO5SAS9.flv',
+        u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
+        u'info_dict': {
+            u"description": u"One Piece 606",
+            u"title": u"One Piece 606"
+        }
+    }
+
+    def _real_extract(self, url):
+        try:
+            mobj = re.match(self._VALID_URL, url)
+            video_id = mobj.group('video_id')
+            webpage = self._download_webpage(url, video_id)
+        except ExtractorError as ee:
+            if not isinstance(ee.cause, compat_HTTPError):
+                raise
+
+            redir_webpage = ee.cause.read().decode('utf-8')
+            action = self._search_regex(
+                r'<form id="challenge-form" action="([^"]+)"',
+                redir_webpage, u'Redirect form')
+            vc = self._search_regex(
+                r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
+                redir_webpage, u'redirect vc value')
+            av = re.search(
+                r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
+                redir_webpage)
+            if av is None:
+                raise ExtractorError(u'Cannot find redirect math task')
+            av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
+
+            parsed_url = compat_urllib_parse_urlparse(url)
+            av_val = av_res + len(parsed_url.netloc)
+            confirm_url = (
+                parsed_url.scheme + u'://' + parsed_url.netloc +
+                action + '?' +
+                compat_urllib_parse.urlencode({
+                    'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
+            self._download_webpage(
+                confirm_url, video_id,
+                note=u'Confirming after redirect')
+            webpage = self._download_webpage(url, video_id)
+
+        video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
+                                       webpage, u'video file URL')
+        video_title = self._og_search_title(webpage)
+        video_description = self._og_search_description(webpage)
+
+        return {
+            '_type': 'video',
+            'id':  video_id,
+            'url': video_url,
+            'ext': 'flv',
+            'title': video_title,
+            'description': video_description
+        }

+ 166 - 0
youtube_dl/extractor/appletrailers.py

@@ -0,0 +1,166 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+)
+
+
+class AppleTrailersIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+    _TEST = {
+        u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
+        u"playlist": [
+            {
+                u"file": u"manofsteel-trailer4.mov",
+                u"md5": u"11874af099d480cc09e103b189805d5f",
+                u"info_dict": {
+                    u"duration": 111,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
+                    u"title": u"Trailer 4",
+                    u"upload_date": u"20130523",
+                    u"uploader_id": u"wb",
+                },
+            },
+            {
+                u"file": u"manofsteel-trailer3.mov",
+                u"md5": u"07a0a262aae5afe68120eed61137ab34",
+                u"info_dict": {
+                    u"duration": 182,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
+                    u"title": u"Trailer 3",
+                    u"upload_date": u"20130417",
+                    u"uploader_id": u"wb",
+                },
+            },
+            {
+                u"file": u"manofsteel-trailer.mov",
+                u"md5": u"e401fde0813008e3307e54b6f384cff1",
+                u"info_dict": {
+                    u"duration": 148,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
+                    u"title": u"Trailer",
+                    u"upload_date": u"20121212",
+                    u"uploader_id": u"wb",
+                },
+            },
+            {
+                u"file": u"manofsteel-teaser.mov",
+                u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
+                u"info_dict": {
+                    u"duration": 93,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
+                    u"title": u"Teaser",
+                    u"upload_date": u"20120721",
+                    u"uploader_id": u"wb",
+                },
+            }
+        ]
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        movie = mobj.group('movie')
+        uploader_id = mobj.group('company')
+
+        playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
+        playlist_snippet = self._download_webpage(playlist_url, movie)
+        playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
+        playlist_html = u'<html>' + playlist_cleaned + u'</html>'
+
+        size_cache = {}
+
+        doc = xml.etree.ElementTree.fromstring(playlist_html)
+        playlist = []
+        for li in doc.findall('./div/ul/li'):
+            title = li.find('.//h3').text
+            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
+            thumbnail = li.find('.//img').attrib['src']
+
+            date_el = li.find('.//p')
+            upload_date = None
+            m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
+            if m:
+                upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
+            runtime_el = date_el.find('./br')
+            m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
+            duration = None
+            if m:
+                duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
+
+            formats = []
+            for formats_el in li.findall('.//a'):
+                if formats_el.attrib['class'] != 'OverlayPanel':
+                    continue
+                target = formats_el.attrib['target']
+
+                format_code = formats_el.text
+                if 'Automatic' in format_code:
+                    continue
+
+                size_q = formats_el.attrib['href']
+                size_id = size_q.rpartition('#videos-')[2]
+                if size_id not in size_cache:
+                    size_url = url + size_q
+                    sizepage_html = self._download_webpage(
+                        size_url, movie,
+                        note=u'Downloading size info %s' % size_id,
+                        errnote=u'Error while downloading size info %s' % size_id,
+                    )
+                    _doc = xml.etree.ElementTree.fromstring(sizepage_html)
+                    size_cache[size_id] = _doc
+
+                sizepage_doc = size_cache[size_id]
+                links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
+                for vid_a in links:
+                    href = vid_a.get('href')
+                    if not href.endswith(target):
+                        continue
+                    detail_q = href.partition('#')[0]
+                    detail_url = url + '/' + detail_q
+
+                    m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
+                    detail_id = m.group('detail_id')
+
+                    detail_html = self._download_webpage(
+                        detail_url, movie,
+                        note=u'Downloading detail %s %s' % (detail_id, size_id),
+                        errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
+                    )
+                    detail_doc = xml.etree.ElementTree.fromstring(detail_html)
+                    movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
+                    assert movie_link_el.get('class') == 'movieLink'
+                    movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
+                    ext = determine_ext(movie_link)
+                    assert ext == 'mov'
+
+                    formats.append({
+                        'format': format_code,
+                        'ext': ext,
+                        'url': movie_link,
+                    })
+
+            info = {
+                '_type': 'video',
+                'id': video_id,
+                'title': title,
+                'formats': formats,
+                'title': title,
+                'duration': duration,
+                'thumbnail': thumbnail,
+                'upload_date': upload_date,
+                'uploader_id': uploader_id,
+                'user_agent': 'QuickTime compatible (youtube-dl)',
+            }
+            # TODO: Remove when #980 has been merged
+            info['url'] = formats[-1]['url']
+            info['ext'] = formats[-1]['ext']
+
+            playlist.append(info)
+
+        return {
+            '_type': 'playlist',
+            'id': movie,
+            'entries': playlist,
+        }

+ 2 - 2
youtube_dl/extractor/c56.py

@@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
 
 
     _TEST ={
     _TEST ={
         u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
         u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
-        u'file': u'93440716.mp4',
-        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
+        u'file': u'93440716.flv',
+        u'md5': u'e59995ac63d0457783ea05f93f12a866',
         u'info_dict': {
         u'info_dict': {
             u'title': u'网事知多少 第32期:车怒',
             u'title': u'网事知多少 第32期:车怒',
         },
         },

+ 35 - 0
youtube_dl/extractor/canalc2.py

@@ -0,0 +1,35 @@
+# coding: utf-8
+import re
+
+from .common import InfoExtractor
+
+
+class Canalc2IE(InfoExtractor):
+    _IE_NAME = 'canalc2.tv'
+    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
+
+    _TEST = {
+        u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
+        u'file': u'12163.mp4',
+        u'md5': u'060158428b650f896c542dfbb3d6487f',
+        u'info_dict': {
+            u'title': u'Terrasses du Numérique'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = re.match(self._VALID_URL, url).group(1)
+        webpage = self._download_webpage(url, video_id)
+        file_name = self._search_regex(
+            r"so\.addVariable\('file','(.*?)'\);",
+            webpage, 'file name')
+        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
+
+        title = self._html_search_regex(
+            r'class="evenement8">(.*?)</a>', webpage, u'title')
+        
+        return {'id': video_id,
+                'ext': 'mp4',
+                'url': video_url,
+                'title': title,
+                }

+ 1 - 1
youtube_dl/extractor/canalplus.py

@@ -5,7 +5,7 @@ from .common import InfoExtractor
 from ..utils import unified_strdate
 from ..utils import unified_strdate
 
 
 class CanalplusIE(InfoExtractor):
 class CanalplusIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
     IE_NAME = u'canalplus.fr'
     IE_NAME = u'canalplus.fr'
 
 

+ 58 - 0
youtube_dl/extractor/cnn.py

@@ -0,0 +1,58 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class CNNIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
+        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
+
+    _TESTS = [{
+        u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
+        u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
+        u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
+        u'info_dict': {
+            u'title': u'Nadal wins 8th French Open title',
+            u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
+        },
+    },
+    {
+        u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
+        u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
+        u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
+        u"info_dict": {
+            u"title": "Student's epic speech stuns new freshmen",
+            u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        path = mobj.group('path')
+        page_title = mobj.group('title')
+        info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
+        info_xml = self._download_webpage(info_url, page_title)
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+
+        formats = []
+        for f in info.findall('files/file'):
+            mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
+            if mf is not None:
+                formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
+        formats = sorted(formats)
+        (_,_,_, video_path) = formats[-1]
+        video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
+
+        thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
+        thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
+
+        return {'id': info.attrib['id'],
+                'title': info.find('headline').text,
+                'url': video_url,
+                'ext': determine_ext(video_url),
+                'thumbnail': thumbnails[-1][1],
+                'thumbnails': thumbs_dict,
+                'description': info.find('description').text,
+                }

+ 13 - 3
youtube_dl/extractor/common.py

@@ -114,6 +114,11 @@ class InfoExtractor(object):
         """Real extraction process. Redefine in subclasses."""
         """Real extraction process. Redefine in subclasses."""
         pass
         pass
 
 
+    @classmethod
+    def ie_key(cls):
+        """A string for getting the InfoExtractor with get_info_extractor"""
+        return cls.__name__[:-2]
+
     @property
     @property
     def IE_NAME(self):
     def IE_NAME(self):
         return type(self).__name__[:-2]
         return type(self).__name__[:-2]
@@ -129,7 +134,7 @@ class InfoExtractor(object):
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             if errnote is None:
             if errnote is None:
                 errnote = u'Unable to download webpage'
                 errnote = u'Unable to download webpage'
-            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
+            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
 
 
     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
         """ Returns a tuple (page content as string, URL handle) """
         """ Returns a tuple (page content as string, URL handle) """
@@ -140,12 +145,17 @@ class InfoExtractor(object):
 
 
         urlh = self._request_webpage(url_or_request, video_id, note, errnote)
         urlh = self._request_webpage(url_or_request, video_id, note, errnote)
         content_type = urlh.headers.get('Content-Type', '')
         content_type = urlh.headers.get('Content-Type', '')
+        webpage_bytes = urlh.read()
         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
         if m:
         if m:
             encoding = m.group(1)
             encoding = m.group(1)
         else:
         else:
-            encoding = 'utf-8'
-        webpage_bytes = urlh.read()
+            m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
+                          webpage_bytes[:1024])
+            if m:
+                encoding = m.group(1).decode('ascii')
+            else:
+                encoding = 'utf-8'
         if self._downloader.params.get('dump_intermediate_pages', False):
         if self._downloader.params.get('dump_intermediate_pages', False):
             try:
             try:
                 url = url_or_request.get_full_url()
                 url = url_or_request.get_full_url()

+ 5 - 3
youtube_dl/extractor/dailymotion.py

@@ -37,14 +37,14 @@ class DailyMotionSubtitlesIE(NoAutoSubtitlesIE):
 class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
 class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
     """Information Extractor for Dailymotion"""
     """Information Extractor for Dailymotion"""
 
 
-    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
+    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
     IE_NAME = u'dailymotion'
     IE_NAME = u'dailymotion'
     _TEST = {
     _TEST = {
         u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
         u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
         u'file': u'x33vw9.mp4',
         u'file': u'x33vw9.mp4',
         u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
         u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
         u'info_dict': {
         u'info_dict': {
-            u"uploader": u"Alex and Van .", 
+            u"uploader": u"Amphora Alex and Van .", 
             u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
             u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
         }
         }
     }
     }
@@ -56,6 +56,7 @@ class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
         video_id = mobj.group(1).split('_')[0].split('?')[0]
         video_id = mobj.group(1).split('_')[0].split('?')[0]
 
 
         video_extension = 'mp4'
         video_extension = 'mp4'
+        url = 'http://www.dailymotion.com/video/%s' % video_id
 
 
         # Retrieve video webpage to extract further information
         # Retrieve video webpage to extract further information
         request = compat_urllib_request.Request(url)
         request = compat_urllib_request.Request(url)
@@ -78,7 +79,8 @@ class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
         embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
         embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
         embed_page = self._download_webpage(embed_url, video_id,
         embed_page = self._download_webpage(embed_url, video_id,
                                             u'Downloading embed page')
                                             u'Downloading embed page')
-        info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
+        info = self._search_regex(r'var info = ({.*?}),$', embed_page,
+            'video info', flags=re.MULTILINE)
         info = json.loads(info)
         info = json.loads(info)
 
 
         # TODO: support choosing qualities
         # TODO: support choosing qualities

+ 74 - 0
youtube_dl/extractor/daum.py

@@ -0,0 +1,74 @@
+# encoding: utf-8
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    determine_ext,
+)
+
+
+class DaumIE(InfoExtractor):
+    _VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
+    IE_NAME = u'daum.net'
+
+    _TEST = {
+        u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
+        u'file': u'52554690.mp4',
+        u'info_dict': {
+            u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
+            u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
+            u'upload_date': u'20130831',
+            u'duration': 3868,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+        canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
+        webpage = self._download_webpage(canonical_url, video_id)
+        full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
+            webpage, u'full id')
+        query = compat_urllib_parse.urlencode({'vid': full_id})
+        info_xml = self._download_webpage(
+            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
+            u'Downloading video info')
+        urls_xml = self._download_webpage(
+            'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
+            video_id, u'Downloading video formats info')
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
+
+        self.to_screen(u'%s: Getting video urls' % video_id)
+        formats = []
+        for format_el in urls.findall('result/output_list/output_list'):
+            profile = format_el.attrib['profile']
+            format_query = compat_urllib_parse.urlencode({
+                'vid': full_id,
+                'profile': profile,
+            })
+            url_xml = self._download_webpage(
+                'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
+                video_id, note=False)
+            url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
+            format_url = url_doc.find('result/url').text
+            formats.append({
+                'url': format_url,
+                'ext': determine_ext(format_url),
+                'format_id': profile,
+            })
+
+        info = {
+            'id': video_id,
+            'title': info.find('TITLE').text,
+            'formats': formats,
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': info.find('CONTENTS').text,
+            'duration': int(info.find('DURATION').text),
+            'upload_date': info.find('REGDTTM').text[:8],
+        }
+        # TODO: Remove when #980 has been merged
+        info.update(formats[-1])
+        return info

+ 39 - 0
youtube_dl/extractor/defense.py

@@ -0,0 +1,39 @@
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class DefenseGouvFrIE(InfoExtractor):
+    _IE_NAME = 'defense.gouv.fr'
+    _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
+        r'ligthboxvideo/base-de-medias/webtv/(.*)')
+
+    _TEST = {
+        u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/'
+        u'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1'),
+        u'file': u'11213.mp4',
+        u'md5': u'75bba6124da7e63d2d60b5244ec9430c',
+        "info_dict": {
+            "title": "attaque-chimique-syrienne-du-21-aout-2013-1"
+        }
+    }
+
+    def _real_extract(self, url):
+        title = re.match(self._VALID_URL, url).group(1)
+        webpage = self._download_webpage(url, title)
+        video_id = self._search_regex(
+            r"flashvars.pvg_id=\"(\d+)\";",
+            webpage, 'ID')
+        
+        json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
+            + video_id)
+        info = self._download_webpage(json_url, title,
+                                                  'Downloading JSON config')
+        video_url = json.loads(info)['renditions'][0]['url']
+        
+        return {'id': video_id,
+                'ext': 'mp4',
+                'url': video_url,
+                'title': title,
+                }

+ 13 - 5
youtube_dl/extractor/generic.py

@@ -8,11 +8,13 @@ from ..utils import (
     compat_urllib_error,
     compat_urllib_error,
     compat_urllib_parse,
     compat_urllib_parse,
     compat_urllib_request,
     compat_urllib_request,
+    compat_urlparse,
 
 
     ExtractorError,
     ExtractorError,
 )
 )
 from .brightcove import BrightcoveIE
 from .brightcove import BrightcoveIE
 
 
+
 class GenericIE(InfoExtractor):
 class GenericIE(InfoExtractor):
     IE_DESC = u'Generic downloader that works on some sites'
     IE_DESC = u'Generic downloader that works on some sites'
     _VALID_URL = r'.*'
     _VALID_URL = r'.*'
@@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):
             u'file': u'13601338388002.mp4',
             u'file': u'13601338388002.mp4',
             u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
             u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
             u'info_dict': {
             u'info_dict': {
-                u"uploader": u"www.hodiho.fr", 
+                u"uploader": u"www.hodiho.fr",
                 u"title": u"R\u00e9gis plante sa Jeep"
                 u"title": u"R\u00e9gis plante sa Jeep"
             }
             }
         },
         },
@@ -107,6 +109,11 @@ class GenericIE(InfoExtractor):
         return new_url
         return new_url
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
+        parsed_url = compat_urlparse.urlparse(url)
+        if not parsed_url.scheme:
+            self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
+            return self.url_result('http://' + url)
+
         try:
         try:
             new_url = self._test_redirect(url)
             new_url = self._test_redirect(url)
             if new_url:
             if new_url:
@@ -124,7 +131,7 @@ class GenericIE(InfoExtractor):
             raise ExtractorError(u'Invalid URL: %s' % url)
             raise ExtractorError(u'Invalid URL: %s' % url)
 
 
         self.report_extraction(video_id)
         self.report_extraction(video_id)
-        # Look for BrigthCove:
+        # Look for BrightCove:
         m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
         m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
         if m_brightcove is not None:
         if m_brightcove is not None:
             self.to_screen(u'Brightcove video detected.')
             self.to_screen(u'Brightcove video detected.')
@@ -151,7 +158,7 @@ class GenericIE(InfoExtractor):
                 mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
                 mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
         if mobj is None:
         if mobj is None:
             # HTML5 video
             # HTML5 video
-            mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
+            mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
         if mobj is None:
         if mobj is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
             raise ExtractorError(u'Invalid URL: %s' % url)
 
 
@@ -160,8 +167,9 @@ class GenericIE(InfoExtractor):
         if mobj.group(1) is None:
         if mobj.group(1) is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
             raise ExtractorError(u'Invalid URL: %s' % url)
 
 
-        video_url = compat_urllib_parse.unquote(mobj.group(1))
-        video_id = os.path.basename(video_url)
+        video_url = mobj.group(1)
+        video_url = compat_urlparse.urljoin(url, video_url)
+        video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
 
 
         # here's a fun little line of code for you:
         # here's a fun little line of code for you:
         video_extension = os.path.splitext(video_id)[1][1:]
         video_extension = os.path.splitext(video_id)[1][1:]

+ 2 - 2
youtube_dl/extractor/googleplus.py

@@ -57,8 +57,8 @@ class GooglePlusIE(InfoExtractor):
             webpage, 'title', default=u'NA')
             webpage, 'title', default=u'NA')
 
 
         # Step 2, Simulate clicking the image box to launch video
         # Step 2, Simulate clicking the image box to launch video
-        DOMAIN = 'https://plus.google.com'
-        video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
+        DOMAIN = 'https://plus.google.com/'
+        video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
             webpage, u'video page URL')
             webpage, u'video page URL')
         if not video_page.startswith(DOMAIN):
         if not video_page.startswith(DOMAIN):
             video_page = DOMAIN + video_page
             video_page = DOMAIN + video_page

+ 37 - 0
youtube_dl/extractor/hark.py

@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+class HarkIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
+    _TEST = {
+        u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
+        u'file': u'mmbzyhkgny.mp3',
+        u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
+        u'info_dict': {
+            u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
+            u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
+            u'duration': 11,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+        json_url = "http://www.hark.com/clips/%s.json" %(video_id)
+        info_json = self._download_webpage(json_url, video_id)
+        info = json.loads(info_json)
+        final_url = info['url']
+
+        return {'id': video_id,
+                'url' : final_url,
+                'title': info['name'],
+                'ext': determine_ext(final_url),
+                'description': info['description'],
+                'thumbnail': info['image_original'],
+                'duration': info['duration'],
+                }

+ 6 - 2
youtube_dl/extractor/ign.py

@@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):
     Some videos of it.ign.com are also supported
     Some videos of it.ign.com are also supported
     """
     """
 
 
-    _VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
+    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'
     IE_NAME = u'ign.com'
     IE_NAME = u'ign.com'
 
 
     _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
     _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
@@ -41,7 +41,11 @@ class IGNIE(InfoExtractor):
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
         name_or_id = mobj.group('name_or_id')
         name_or_id = mobj.group('name_or_id')
+        page_type = mobj.group('type')
         webpage = self._download_webpage(url, name_or_id)
         webpage = self._download_webpage(url, name_or_id)
+        if page_type == 'articles':
+            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
+            return self.url_result(video_url, ie='IGN')
         video_id = self._find_video_id(webpage)
         video_id = self._find_video_id(webpage)
         result = self._get_video_info(video_id)
         result = self._get_video_info(video_id)
         description = self._html_search_regex(self._DESCRIPTION_RE,
         description = self._html_search_regex(self._DESCRIPTION_RE,
@@ -68,7 +72,7 @@ class IGNIE(InfoExtractor):
 class OneUPIE(IGNIE):
 class OneUPIE(IGNIE):
     """Extractor for 1up.com, it uses the ign videos system."""
     """Extractor for 1up.com, it uses the ign videos system."""
 
 
-    _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
+    _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
     IE_NAME = '1up.com'
     IE_NAME = '1up.com'
 
 
     _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
     _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'

+ 4 - 2
youtube_dl/extractor/kankan.py

@@ -21,8 +21,10 @@ class KankanIE(InfoExtractor):
         video_id = mobj.group('id')
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
         webpage = self._download_webpage(url, video_id)
 
 
-        title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
-        gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
+        title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
+        surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
+        gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
+        gcid = gcids[-1]
 
 
         video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
         video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
                                                  video_id, u'Downloading video url info')
                                                  video_id, u'Downloading video url info')

+ 1 - 1
youtube_dl/extractor/metacafe.py

@@ -122,7 +122,7 @@ class MetacafeIE(InfoExtractor):
         video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
         video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
         description = self._og_search_description(webpage)
         description = self._og_search_description(webpage)
         video_uploader = self._html_search_regex(
         video_uploader = self._html_search_regex(
-                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
+                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
                 webpage, u'uploader nickname', fatal=False)
                 webpage, u'uploader nickname', fatal=False)
 
 
         return {
         return {

+ 55 - 0
youtube_dl/extractor/metacritic.py

@@ -0,0 +1,55 @@
+import re
+import xml.etree.ElementTree
+import operator
+
+from .common import InfoExtractor
+
+
+class MetacriticIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
+        u'file': u'3698222.mp4',
+        u'info_dict': {
+            u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
+            u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
+            u'duration': 221,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        # The xml is not well formatted, there are raw '&'
+        info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
+            video_id, u'Downloading info xml').replace('&', '&amp;')
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+
+        clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
+        formats = []
+        for videoFile in clip.findall('httpURI/videoFile'):
+            rate_str = videoFile.find('rate').text
+            video_url = videoFile.find('filePath').text
+            formats.append({
+                'url': video_url,
+                'ext': 'mp4',
+                'format_id': rate_str,
+                'rate': int(rate_str),
+            })
+        formats.sort(key=operator.itemgetter('rate'))
+
+        description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
+            webpage, u'description', flags=re.DOTALL)
+
+        info = {
+            'id': video_id,
+            'title': clip.find('title').text,
+            'formats': formats,
+            'description': description,
+            'duration': int(clip.find('duration').text),
+        }
+        # TODO: Remove when #980 has been merged
+        info.update(formats[-1])
+        return info

+ 74 - 0
youtube_dl/extractor/mit.py

@@ -0,0 +1,74 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    get_element_by_id,
+)
+
+
+class TechTVMITIE(InfoExtractor):
+    IE_NAME = u'techtv.mit.edu'
+    _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
+        u'file': u'25418.mp4',
+        u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
+        u'info_dict': {
+            u'title': u'MIT DNA Learning Center Set',
+            u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        raw_page = self._download_webpage(
+            'http://techtv.mit.edu/videos/%s' % video_id, video_id)
+        clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
+
+        base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
+            raw_page, u'base url')
+        formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
+            u'video formats')
+        formats = json.loads(formats_json)
+        formats = sorted(formats, key=lambda f: f['bitrate'])
+
+        title = get_element_by_id('edit-title', clean_page)
+        description = clean_html(get_element_by_id('edit-description', clean_page))
+        thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
+            raw_page, u'thumbnail', flags=re.DOTALL)
+
+        return {'id': video_id,
+                'title': title,
+                'url': base_url + formats[-1]['url'].replace('mp4:', ''),
+                'ext': 'mp4',
+                'description': description,
+                'thumbnail': thumbnail,
+                }
+
+
+class MITIE(TechTVMITIE):
+    IE_NAME = u'video.mit.edu'
+    _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
+
+    _TEST = {
+        u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
+        u'file': u'21783.mp4',
+        u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
+        u'info_dict': {
+            u'title': u'The Government is Profiling You',
+            u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        page_title = mobj.group('title')
+        webpage = self._download_webpage(url, page_title)
+        self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
+        embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
+            u'embed url')
+        return self.url_result(embed_url, ie='TechTVMIT')

+ 73 - 0
youtube_dl/extractor/naver.py

@@ -0,0 +1,73 @@
+# encoding: utf-8
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    ExtractorError,
+)
+
+
+class NaverIE(InfoExtractor):
+    _VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://tvcast.naver.com/v/81652',
+        u'file': u'81652.mp4',
+        u'info_dict': {
+            u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
+            u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
+            u'upload_date': u'20130903',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+        webpage = self._download_webpage(url, video_id)
+        m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
+            webpage)
+        if m_id is None:
+            raise ExtractorError(u'couldn\'t extract vid and key')
+        vid = m_id.group(1)
+        key = m_id.group(2)
+        query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
+        query_urls = compat_urllib_parse.urlencode({
+            'masterVid': vid,
+            'protocol': 'p2p',
+            'inKey': key,
+        })
+        info_xml = self._download_webpage(
+            'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
+            video_id, u'Downloading video info')
+        urls_xml = self._download_webpage(
+            'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
+            video_id, u'Downloading video formats info')
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
+
+        formats = []
+        for format_el in urls.findall('EncodingOptions/EncodingOption'):
+            domain = format_el.find('Domain').text
+            if domain.startswith('rtmp'):
+                continue
+            formats.append({
+                'url': domain + format_el.find('uri').text,
+                'ext': 'mp4',
+                'width': int(format_el.find('width').text),
+                'height': int(format_el.find('height').text),
+            })
+
+        info = {
+            'id': video_id,
+            'title': info.find('Subject').text,
+            'formats': formats,
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'upload_date': info.find('WriteDate').text.replace('.', ''),
+            'view_count': int(info.find('PlayCount').text),
+        }
+        # TODO: Remove when #980 has been merged
+        info.update(formats[-1])
+        return info

+ 33 - 0
youtube_dl/extractor/nbc.py

@@ -0,0 +1,33 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import find_xpath_attr, compat_str
+
+
+class NBCNewsIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
+        u'file': u'52753292.flv',
+        u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
+        u'info_dict': {
+            u'title': u'Crew emerges after four-month Mars food study',
+            u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
+
+        return {'id': video_id,
+                'title': info.find('headline').text,
+                'ext': 'flv',
+                'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
+                'description': compat_str(info.find('caption').text),
+                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
+                }

+ 54 - 0
youtube_dl/extractor/orf.py

@@ -0,0 +1,54 @@
+# coding: utf-8
+
+import re
+import xml.etree.ElementTree
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    ExtractorError,
+    find_xpath_attr,
+)
+
+class ORFIE(InfoExtractor):
+    _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        webpage = self._download_webpage(url, playlist_id)
+
+        flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
+        flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
+        flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
+        playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
+        playlist = json.loads(playlist_json)
+
+        videos = []
+        ns = '{http://tempuri.org/XMLSchema.xsd}'
+        xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
+        webpage_description = self._og_search_description(webpage)
+        for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
+            # Get best quality url
+            rtmp_url = None
+            for q in ['Q6A', 'Q4A', 'Q1A']:
+                video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
+                if video_url is not None:
+                    rtmp_url = video_url.text
+                    break
+            if rtmp_url is None:
+                raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
+            description = self._html_search_regex(
+                r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
+                u'description', default=webpage_description, flags=re.DOTALL)
+            videos.append({
+                '_type': 'video',
+                'id': info['id'],
+                'title': info['title'],
+                'url': rtmp_url,
+                'ext': 'flv',
+                'description': description,
+                })
+
+        return videos

+ 42 - 0
youtube_dl/extractor/ro220.py

@@ -0,0 +1,42 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    compat_parse_qs,
+)
+
+
+class Ro220IE(InfoExtractor):
+    IE_NAME = '220.ro'
+    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
+    _TEST = {
+        u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
+        u'file': u'LYV6doKo7f.mp4',
+        u'md5': u'03af18b73a07b4088753930db7a34add',
+        u'info_dict': {
+            u"title": u"Luati-le Banii sez 4 ep 1",
+            u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+
+        webpage = self._download_webpage(url, video_id)
+        flashVars_str = self._search_regex(
+            r'<param name="flashVars" value="([^"]+)"',
+            webpage, u'flashVars')
+        flashVars = compat_parse_qs(flashVars_str)
+
+        info = {
+            '_type': 'video',
+            'id': video_id,
+            'ext': 'mp4',
+            'url': flashVars['videoURL'][0],
+            'title': flashVars['title'][0],
+            'description': clean_html(flashVars['desc'][0]),
+            'thumbnail': flashVars['preview'][0],
+        }
+        return info

+ 15 - 2
youtube_dl/extractor/rtlnow.py

@@ -8,8 +8,8 @@ from ..utils import (
 )
 )
 
 
 class RTLnowIE(InfoExtractor):
 class RTLnowIE(InfoExtractor):
-    """Information Extractor for RTLnow, RTL2now and VOXnow"""
-    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+    """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
+    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
     _TESTS = [{
     _TESTS = [{
         u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
         u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
         u'file': u'90419.flv',
         u'file': u'90419.flv',
@@ -48,6 +48,19 @@ class RTLnowIE(InfoExtractor):
         u'params': {
         u'params': {
             u'skip_download': True,
             u'skip_download': True,
         },
         },
+    },
+    {
+        u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
+        u'file': u'99205.flv',
+        u'info_dict': {
+            u'upload_date': u'20080928', 
+            u'title': u'Medicopter 117 - Angst!',
+            u'description': u'Angst!',
+            u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
+        },
+        u'params': {
+            u'skip_download': True,
+        },
     }]
     }]
 
 
     def _real_extract(self,url):
     def _real_extract(self,url):

+ 90 - 0
youtube_dl/extractor/sohu.py

@@ -0,0 +1,90 @@
+# encoding: utf-8
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SohuIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
+
+    _TEST = {
+        u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
+        u'file': u'382479172.mp4',
+        u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
+        u'info_dict': {
+            u'title': u'MV:Far East Movement《The Illest》',
+        },
+    }
+
+    def _real_extract(self, url):
+
+        def _fetch_data(vid_id):
+            base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+            data_url = base_data_url + str(vid_id)
+            data_json = self._download_webpage(
+                data_url, video_id,
+                note=u'Downloading JSON data for ' + str(vid_id))
+            return json.loads(data_json)
+
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
+                                            webpage, u'video title')
+        title = raw_title.partition('-')[0].strip()
+
+        vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
+                                      u'video path')
+        data = _fetch_data(vid)
+
+        QUALITIES = ('ori', 'super', 'high', 'nor')
+        vid_ids = [data['data'][q + 'Vid']
+                   for q in QUALITIES
+                   if data['data'][q + 'Vid'] != 0]
+        if not vid_ids:
+            raise ExtractorError(u'No formats available for this video')
+
+        # For now, we just pick the highest available quality
+        vid_id = vid_ids[-1]
+
+        format_data = data if vid == vid_id else _fetch_data(vid_id)
+        part_count = format_data['data']['totalBlocks']
+        allot = format_data['allot']
+        prot = format_data['prot']
+        clipsURL = format_data['data']['clipsURL']
+        su = format_data['data']['su']
+
+        playlist = []
+        for i in range(part_count):
+            part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
+                        (allot, prot, clipsURL[i], su[i]))
+            part_str = self._download_webpage(
+                part_url, video_id,
+                note=u'Downloading part %d of %d' % (i+1, part_count))
+
+            part_info = part_str.split('|')
+            video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
+
+            video_info = {
+                'id': '%s_part%02d' % (video_id, i + 1),
+                'title': title,
+                'url': video_url,
+                'ext': 'mp4',
+            }
+            playlist.append(video_info)
+
+        if len(playlist) == 1:
+            info = playlist[0]
+            info['id'] = video_id
+        else:
+            info = {
+                '_type': 'playlist',
+                'entries': playlist,
+                'id': video_id,
+            }
+
+        return info

+ 73 - 0
youtube_dl/extractor/trilulilu.py

@@ -0,0 +1,73 @@
+import json
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+
+
+class TriluliluIE(InfoExtractor):
+    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
+    _TEST = {
+        u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
+        u'file': u"big-buck-bunny-1.mp4",
+        u'info_dict': {
+            u"title": u"Big Buck Bunny",
+            u"description": u":) pentru copilul din noi",
+        },
+        # Server ignores Range headers (--test)
+        u"params": {
+            u"skip_download": True
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._og_search_description(webpage)
+
+        log_str = self._search_regex(
+            r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
+        log = json.loads(log_str)
+
+        format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
+                      u'video-formats2' % log)
+        format_str = self._download_webpage(
+            format_url, video_id,
+            note=u'Downloading formats',
+            errnote=u'Error while downloading formats')
+
+        format_doc = xml.etree.ElementTree.fromstring(format_str)
+ 
+        video_url_template = (
+            u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
+            u'&source=site&hash=%(hash)s&username=%(userid)s&'
+            u'key=ministhebest&format=%%s&sig=&exp=' %
+            log)
+        formats = [
+            {
+                'format': fnode.text,
+                'url': video_url_template % fnode.text,
+            }
+
+            for fnode in format_doc.findall('./formats/format')
+        ]
+
+        info = {
+            '_type': 'video',
+            'id': video_id,
+            'formats': formats,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
+
+        # TODO: Remove when #980 has been merged
+        info['url'] = formats[-1]['url']
+        info['ext'] = formats[-1]['format'].partition('-')[0]
+
+        return info

+ 1 - 1
youtube_dl/extractor/unistra.py

@@ -11,7 +11,7 @@ class UnistraIE(InfoExtractor):
         u'md5': u'736f605cfdc96724d55bb543ab3ced24',
         u'md5': u'736f605cfdc96724d55bb543ab3ced24',
         u'info_dict': {
         u'info_dict': {
             u'title': u'M!ss Yella',
             u'title': u'M!ss Yella',
-            u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc',
+            u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
         },
         },
     }
     }
 
 

+ 56 - 0
youtube_dl/extractor/veehd.py

@@ -0,0 +1,56 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    get_element_by_id,
+    clean_html,
+)
+
+class VeeHDIE(InfoExtractor):
+    _VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://veehd.com/video/4686958',
+        u'file': u'4686958.mp4',
+        u'info_dict': {
+            u'title': u'Time Lapse View from Space ( ISS)',
+            u'uploader_id': u'spotted',
+            u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
+            webpage, u'player path')
+        player_url = compat_urlparse.urljoin(url, player_path)
+        player_page = self._download_webpage(player_url, video_id,
+            u'Downloading player page')
+        config_json = self._search_regex(r'value=\'config=({.+?})\'',
+            player_page, u'config json')
+        config = json.loads(config_json)
+
+        video_url = compat_urlparse.unquote(config['clip']['url'])
+        title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
+        uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
+            webpage, u'uploader')
+        thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
+            webpage, u'thumbnail')
+        description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
+            webpage, u'description', flags=re.DOTALL)
+
+        return {
+            '_type': 'video',
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'ext': 'mp4',
+            'uploader_id': uploader_id,
+            'thumbnail': thumbnail,
+            'description': description,
+        }

+ 37 - 11
youtube_dl/extractor/vimeo.py

@@ -44,6 +44,16 @@ class VimeoIE(InfoExtractor):
                 u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
                 u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
             },
             },
         },
         },
+        {
+            u'url': u'http://player.vimeo.com/video/54469442',
+            u'file': u'54469442.mp4',
+            u'md5': u'619b811a4417aa4abe78dc653becf511',
+            u'note': u'Videos that embed the url in the player page',
+            u'info_dict': {
+                u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
+                u'uploader': u'The BLN & Business of Software',
+            },
+        },
     ]
     ]
 
 
     def _login(self):
     def _login(self):
@@ -112,7 +122,8 @@ class VimeoIE(InfoExtractor):
 
 
         # Extract the config JSON
         # Extract the config JSON
         try:
         try:
-            config = webpage.split(' = {config:')[1].split(',assets:')[0]
+            config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
+                webpage, u'info section', flags=re.DOTALL)
             config = json.loads(config)
             config = json.loads(config)
         except:
         except:
             if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
             if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
@@ -132,12 +143,22 @@ class VimeoIE(InfoExtractor):
         video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
         video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
 
 
         # Extract video thumbnail
         # Extract video thumbnail
-        video_thumbnail = config["video"]["thumbnail"]
+        video_thumbnail = config["video"].get("thumbnail")
+        if video_thumbnail is None:
+            _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
 
 
         # Extract video description
         # Extract video description
-        video_description = get_element_by_attribute("itemprop", "description", webpage)
-        if video_description: video_description = clean_html(video_description)
-        else: video_description = u''
+        video_description = None
+        try:
+            video_description = get_element_by_attribute("itemprop", "description", webpage)
+            if video_description: video_description = clean_html(video_description)
+        except AssertionError as err:
+            # On some pages like (http://player.vimeo.com/video/54469442) the
+            # html tags are not closed, python 2.6 cannot handle it
+            if err.args[0] == 'we should not get here!':
+                pass
+            else:
+                raise
 
 
         # Extract upload date
         # Extract upload date
         video_upload_date = None
         video_upload_date = None
@@ -154,14 +175,15 @@ class VimeoIE(InfoExtractor):
         # TODO bind to format param
         # TODO bind to format param
         codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
         codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
         files = { 'hd': [], 'sd': [], 'other': []}
         files = { 'hd': [], 'sd': [], 'other': []}
+        config_files = config["video"].get("files") or config["request"].get("files")
         for codec_name, codec_extension in codecs:
         for codec_name, codec_extension in codecs:
-            if codec_name in config["video"]["files"]:
-                if 'hd' in config["video"]["files"][codec_name]:
+            if codec_name in config_files:
+                if 'hd' in config_files[codec_name]:
                     files['hd'].append((codec_name, codec_extension, 'hd'))
                     files['hd'].append((codec_name, codec_extension, 'hd'))
-                elif 'sd' in config["video"]["files"][codec_name]:
+                elif 'sd' in config_files[codec_name]:
                     files['sd'].append((codec_name, codec_extension, 'sd'))
                     files['sd'].append((codec_name, codec_extension, 'sd'))
                 else:
                 else:
-                    files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
+                    files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))
 
 
         for quality in ('hd', 'sd', 'other'):
         for quality in ('hd', 'sd', 'other'):
             if len(files[quality]) > 0:
             if len(files[quality]) > 0:
@@ -173,8 +195,12 @@ class VimeoIE(InfoExtractor):
         else:
         else:
             raise ExtractorError(u'No known codec found')
             raise ExtractorError(u'No known codec found')
 
 
-        video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
-                    %(video_id, sig, timestamp, video_quality, video_codec.upper())
+        video_url = None
+        if isinstance(config_files[video_codec], dict):
+            video_url = config_files[video_codec][video_quality].get("url")
+        if video_url is None:
+            video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
+                        %(video_id, sig, timestamp, video_quality, video_codec.upper())
 
 
         return [{
         return [{
             'id':       video_id,
             'id':       video_id,

+ 0 - 1
youtube_dl/extractor/wat.py

@@ -6,7 +6,6 @@ import re
 from .common import InfoExtractor
 from .common import InfoExtractor
 
 
 from ..utils import (
 from ..utils import (
-    compat_urllib_parse,
     unified_strdate,
     unified_strdate,
 )
 )
 
 

+ 10 - 8
youtube_dl/extractor/xhamster.py

@@ -3,7 +3,8 @@ import re
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
     compat_urllib_parse,
     compat_urllib_parse,
-
+    unescapeHTML,
+    determine_ext,
     ExtractorError,
     ExtractorError,
 )
 )
 
 
@@ -36,15 +37,16 @@ class XHamsterIE(InfoExtractor):
             video_url = compat_urllib_parse.unquote(mobj.group('file'))
             video_url = compat_urllib_parse.unquote(mobj.group('file'))
         else:
         else:
             video_url = mobj.group('server')+'/key='+mobj.group('file')
             video_url = mobj.group('server')+'/key='+mobj.group('file')
-        video_extension = video_url.split('.')[-1]
 
 
         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
             webpage, u'title')
             webpage, u'title')
 
 
-        # Can't see the description anywhere in the UI
-        # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
-        #     webpage, u'description', fatal=False)
-        # if video_description: video_description = unescapeHTML(video_description)
+        # Only a few videos have an description
+        mobj = re.search('<span>Description: </span>(?P<description>[^<]+)', webpage)
+        if mobj:
+            video_description = unescapeHTML(mobj.group('description'))
+        else:
+            video_description = None
 
 
         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
         if mobj:
         if mobj:
@@ -62,9 +64,9 @@ class XHamsterIE(InfoExtractor):
         return [{
         return [{
             'id':       video_id,
             'id':       video_id,
             'url':      video_url,
             'url':      video_url,
-            'ext':      video_extension,
+            'ext':      determine_ext(video_url),
             'title':    video_title,
             'title':    video_title,
-            # 'description': video_description,
+            'description': video_description,
             'upload_date': video_upload_date,
             'upload_date': video_upload_date,
             'uploader_id': video_uploader_id,
             'uploader_id': video_uploader_id,
             'thumbnail': video_thumbnail
             'thumbnail': video_thumbnail

+ 14 - 4
youtube_dl/extractor/youporn.py

@@ -12,14 +12,16 @@ from ..utils import (
     unescapeHTML,
     unescapeHTML,
     unified_strdate,
     unified_strdate,
 )
 )
-
+from ..aes import (
+    aes_decrypt_text
+)
 
 
 class YouPornIE(InfoExtractor):
 class YouPornIE(InfoExtractor):
     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
     _TEST = {
     _TEST = {
         u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
         u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
         u'file': u'505835.mp4',
         u'file': u'505835.mp4',
-        u'md5': u'c37ddbaaa39058c76a7e86c6813423c1',
+        u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
         u'info_dict': {
         u'info_dict': {
             u"upload_date": u"20101221", 
             u"upload_date": u"20101221", 
             u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", 
             u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", 
@@ -75,7 +77,15 @@ class YouPornIE(InfoExtractor):
         # Get all of the links from the page
         # Get all of the links from the page
         LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
         LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
         links = re.findall(LINK_RE, download_list_html)
         links = re.findall(LINK_RE, download_list_html)
-        if(len(links) == 0):
+        
+        # Get link of hd video if available
+        mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
+        if mobj != None:
+            encrypted_video_url = mobj.group(u'encrypted_video_url')
+            video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
+            links = [video_url] + links
+        
+        if not links:
             raise ExtractorError(u'ERROR: no known formats available for video')
             raise ExtractorError(u'ERROR: no known formats available for video')
 
 
         self.to_screen(u'Links found: %d' % len(links))
         self.to_screen(u'Links found: %d' % len(links))
@@ -112,7 +122,7 @@ class YouPornIE(InfoExtractor):
             self._print_formats(formats)
             self._print_formats(formats)
             return
             return
 
 
-        req_format = self._downloader.params.get('format', None)
+        req_format = self._downloader.params.get('format', 'best')
         self.to_screen(u'Format: %s' % req_format)
         self.to_screen(u'Format: %s' % req_format)
 
 
         if req_format is None or req_format == 'best':
         if req_format is None or req_format == 'best':

+ 63 - 26
youtube_dl/extractor/youtube.py

@@ -194,7 +194,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
     _VALID_URL = r"""^
     _VALID_URL = r"""^
                      (
                      (
                          (?:https?://)?                                       # http(s):// (optional)
                          (?:https?://)?                                       # http(s):// (optional)
-                         (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
+                         (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
                             tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
                             tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                          (?:                                                  # the various things that can precede the ID:
                          (?:                                                  # the various things that can precede the ID:
@@ -205,15 +205,18 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                  v=
                                  v=
                              )
                              )
-                         )?                                                   # optional -> youtube.com/xxxx is OK
+                         ))
+                         |youtu\.be/                                          # just youtu.be/xxxx
+                         )
                      )?                                                       # all until now is optional -> you can pass the naked ID
                      )?                                                       # all until now is optional -> you can pass the naked ID
                      ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
                      ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
                      (?(1).+)?                                                # if we found the ID, everything can follow
                      (?(1).+)?                                                # if we found the ID, everything can follow
                      $"""
                      $"""
     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
     # Listed in order of quality
     # Listed in order of quality
-    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
-                          '95', '94', '93', '92', '132', '151',
+    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
+                          # Apple HTTP Live Streaming
+                          '96', '95', '94', '93', '92', '132', '151',
                           # 3D
                           # 3D
                           '85', '84', '102', '83', '101', '82', '100',
                           '85', '84', '102', '83', '101', '82', '100',
                           # Dash video
                           # Dash video
@@ -222,8 +225,10 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
                           # Dash audio
                           # Dash audio
                           '141', '172', '140', '171', '139',
                           '141', '172', '140', '171', '139',
                           ]
                           ]
-    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
-                                      '95', '94', '93', '92', '132', '151',
+    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
+                                      # Apple HTTP Live Streaming
+                                      '96', '95', '94', '93', '92', '132', '151',
+                                      # 3D
                                       '85', '102', '84', '101', '83', '100', '82',
                                       '85', '102', '84', '101', '83', '100', '82',
                                       # Dash video
                                       # Dash video
                                       '138', '248', '137', '247', '136', '246', '245',
                                       '138', '248', '137', '247', '136', '246', '245',
@@ -231,11 +236,18 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
                                       # Dash audio
                                       # Dash audio
                                       '172', '141', '171', '140', '139',
                                       '172', '141', '171', '140', '139',
                                       ]
                                       ]
+    _video_formats_map = {
+        'flv': ['35', '34', '6', '5'],
+        '3gp': ['36', '17', '13'],
+        'mp4': ['38', '37', '22', '18'],
+        'webm': ['46', '45', '44', '43'],
+    }
     _video_extensions = {
     _video_extensions = {
         '13': '3gp',
         '13': '3gp',
-        '17': 'mp4',
+        '17': '3gp',
         '18': 'mp4',
         '18': 'mp4',
         '22': 'mp4',
         '22': 'mp4',
+        '36': '3gp',
         '37': 'mp4',
         '37': 'mp4',
         '38': 'mp4',
         '38': 'mp4',
         '43': 'webm',
         '43': 'webm',
@@ -252,7 +264,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
         '101': 'webm',
         '101': 'webm',
         '102': 'webm',
         '102': 'webm',
 
 
-        # videos that use m3u8
+        # Apple HTTP Live Streaming
         '92': 'mp4',
         '92': 'mp4',
         '93': 'mp4',
         '93': 'mp4',
         '94': 'mp4',
         '94': 'mp4',
@@ -293,6 +305,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
         '22': '720x1280',
         '22': '720x1280',
         '34': '360x640',
         '34': '360x640',
         '35': '480x854',
         '35': '480x854',
+        '36': '240x320',
         '37': '1080x1920',
         '37': '1080x1920',
         '38': '3072x4096',
         '38': '3072x4096',
         '43': '360x640',
         '43': '360x640',
@@ -394,7 +407,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
             u"info_dict": {
             u"info_dict": {
                 u"upload_date": u"20120506",
                 u"upload_date": u"20120506",
                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
-                u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
+                u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
                 u"uploader": u"Icona Pop",
                 u"uploader": u"Icona Pop",
                 u"uploader_id": u"IconaPop"
                 u"uploader_id": u"IconaPop"
             }
             }
@@ -432,7 +445,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
     @classmethod
     @classmethod
     def suitable(cls, url):
     def suitable(cls, url):
         """Receives a URL and returns True if suitable for this IE."""
         """Receives a URL and returns True if suitable for this IE."""
-        if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
+        if YoutubePlaylistIE.suitable(url): return False
         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 
 
     def report_video_webpage_download(self, video_id):
     def report_video_webpage_download(self, video_id):
@@ -465,15 +478,15 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
         elif len(s) == 89:
         elif len(s) == 89:
             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
         elif len(s) == 88:
         elif len(s) == 88:
-            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
+            return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
         elif len(s) == 87:
         elif len(s) == 87:
             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
         elif len(s) == 86:
         elif len(s) == 86:
-            return s[5:20] + s[2] + s[21:]
+            return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
         elif len(s) == 85:
         elif len(s) == 85:
             return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
             return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
         elif len(s) == 84:
         elif len(s) == 84:
-            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
+            return s[81:36:-1] + s[0] + s[35:2:-1]
         elif len(s) == 83:
         elif len(s) == 83:
             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
         elif len(s) == 82:
         elif len(s) == 82:
@@ -537,13 +550,25 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
         else:
         else:
             # Specific formats. We pick the first in a slash-delimeted sequence.
             # Specific formats. We pick the first in a slash-delimeted sequence.
-            # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
+            # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
+            # available in the specified format. For example,
+            # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
+            # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
+            # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
             req_formats = req_format.split('/')
             req_formats = req_format.split('/')
             video_url_list = None
             video_url_list = None
             for rf in req_formats:
             for rf in req_formats:
                 if rf in url_map:
                 if rf in url_map:
                     video_url_list = [(rf, url_map[rf])]
                     video_url_list = [(rf, url_map[rf])]
                     break
                     break
+                if rf in self._video_formats_map:
+                    for srf in self._video_formats_map[rf]:
+                        if srf in url_map:
+                            video_url_list = [(srf, url_map[srf])]
+                            break
+                    else:
+                        continue
+                    break
             if video_url_list is None:
             if video_url_list is None:
                 raise ExtractorError(u'requested format not available')
                 raise ExtractorError(u'requested format not available')
         return video_url_list
         return video_url_list
@@ -558,7 +583,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
         formats_urls = _get_urls(manifest)
         formats_urls = _get_urls(manifest)
         for format_url in formats_urls:
         for format_url in formats_urls:
-            itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
+            itag = self._search_regex(r'itag%3D(\d+?)/', format_url, 'itag')
             url_map[itag] = format_url
             url_map[itag] = format_url
         return url_map
         return url_map
 
 
@@ -860,8 +885,11 @@ class YoutubePlaylistIE(InfoExtractor):
 
 
             for entry in response['feed']['entry']:
             for entry in response['feed']['entry']:
                 index = entry['yt$position']['$t']
                 index = entry['yt$position']['$t']
-                if 'media$group' in entry and 'media$player' in entry['media$group']:
-                    videos.append((index, entry['media$group']['media$player']['url']))
+                if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
+                    videos.append((
+                        index,
+                        'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
+                    ))
 
 
         videos = [v[1] for v in sorted(videos)]
         videos = [v[1] for v in sorted(videos)]
 
 
@@ -927,13 +955,20 @@ class YoutubeChannelIE(InfoExtractor):
 
 
 class YoutubeUserIE(InfoExtractor):
 class YoutubeUserIE(InfoExtractor):
     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
     _GDATA_PAGE_SIZE = 50
     _GDATA_PAGE_SIZE = 50
-    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
-    _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
+    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
     IE_NAME = u'youtube:user'
     IE_NAME = u'youtube:user'
 
 
+    @classmethod
+    def suitable(cls, url):
+        # Don't return True if the url can be extracted with other youtube
+        # extractor, the regex would is too permissive and it would match.
+        other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
+        if any(ie.suitable(url) for ie in other_ies): return False
+        else: return super(YoutubeUserIE, cls).suitable(url)
+
     def _real_extract(self, url):
     def _real_extract(self, url):
         # Extract username
         # Extract username
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
@@ -956,13 +991,15 @@ class YoutubeUserIE(InfoExtractor):
             page = self._download_webpage(gdata_url, username,
             page = self._download_webpage(gdata_url, username,
                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
 
 
+            try:
+                response = json.loads(page)
+            except ValueError as err:
+                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
+
             # Extract video identifiers
             # Extract video identifiers
             ids_in_page = []
             ids_in_page = []
-
-            for mobj in re.finditer(self._VIDEO_INDICATOR, page):
-                if mobj.group(1) not in ids_in_page:
-                    ids_in_page.append(mobj.group(1))
-
+            for entry in response['feed']['entry']:
+                ids_in_page.append(entry['id']['$t'].split('/')[-1])
             video_ids.extend(ids_in_page)
             video_ids.extend(ids_in_page)
 
 
             # A little optimization - if current page is not
             # A little optimization - if current page is not
@@ -1101,7 +1138,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
     IE_NAME = u'youtube:favorites'
     IE_NAME = u'youtube:favorites'
     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
+    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
     _LOGIN_REQUIRED = True
     _LOGIN_REQUIRED = True
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):

+ 57 - 7
youtube_dl/utils.py

@@ -1,19 +1,20 @@
 #!/usr/bin/env python
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 
 
+import datetime
+import email.utils
 import errno
 import errno
 import gzip
 import gzip
 import io
 import io
 import json
 import json
 import locale
 import locale
 import os
 import os
+import platform
 import re
 import re
+import socket
 import sys
 import sys
 import traceback
 import traceback
 import zlib
 import zlib
-import email.utils
-import socket
-import datetime
 
 
 try:
 try:
     import urllib.request as compat_urllib_request
     import urllib.request as compat_urllib_request
@@ -60,6 +61,11 @@ try:
 except ImportError: # Python 2
 except ImportError: # Python 2
     import httplib as compat_http_client
     import httplib as compat_http_client
 
 
+try:
+    from urllib.error import HTTPError as compat_HTTPError
+except ImportError:  # Python 2
+    from urllib2 import HTTPError as compat_HTTPError
+
 try:
 try:
     from subprocess import DEVNULL
     from subprocess import DEVNULL
     compat_subprocess_get_DEVNULL = lambda: DEVNULL
     compat_subprocess_get_DEVNULL = lambda: DEVNULL
@@ -207,7 +213,7 @@ if sys.version_info >= (2,7):
     def find_xpath_attr(node, xpath, key, val):
     def find_xpath_attr(node, xpath, key, val):
         """ Find the xpath xpath[@key=val] """
         """ Find the xpath xpath[@key=val] """
         assert re.match(r'^[a-zA-Z]+$', key)
         assert re.match(r'^[a-zA-Z]+$', key)
-        assert re.match(r'^[a-zA-Z@\s]*$', val)
+        assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
         expr = xpath + u"[@%s='%s']" % (key, val)
         expr = xpath + u"[@%s='%s']" % (key, val)
         return node.find(expr)
         return node.find(expr)
 else:
 else:
@@ -489,7 +495,7 @@ def make_HTTPS_handler(opts):
 
 
 class ExtractorError(Exception):
 class ExtractorError(Exception):
     """Error during info extraction."""
     """Error during info extraction."""
-    def __init__(self, msg, tb=None, expected=False):
+    def __init__(self, msg, tb=None, expected=False, cause=None):
         """ tb, if given, is the original traceback (so that it can be printed out).
         """ tb, if given, is the original traceback (so that it can be printed out).
         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
         """
         """
@@ -502,6 +508,7 @@ class ExtractorError(Exception):
 
 
         self.traceback = tb
         self.traceback = tb
         self.exc_info = sys.exc_info()  # preserve original exception
         self.exc_info = sys.exc_info()  # preserve original exception
+        self.cause = cause
 
 
     def format_traceback(self):
     def format_traceback(self):
         if self.traceback is None:
         if self.traceback is None:
@@ -622,8 +629,23 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
         old_resp = resp
         old_resp = resp
         # gzip
         # gzip
         if resp.headers.get('Content-encoding', '') == 'gzip':
         if resp.headers.get('Content-encoding', '') == 'gzip':
-            gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
-            resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+            content = resp.read()
+            gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
+            try:
+                uncompressed = io.BytesIO(gz.read())
+            except IOError as original_ioerror:
+                # There may be junk add the end of the file
+                # See http://stackoverflow.com/q/4928560/35070 for details
+                for i in range(1, 1024):
+                    try:
+                        gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
+                        uncompressed = io.BytesIO(gz.read())
+                    except IOError:
+                        continue
+                    break
+                else:
+                    raise original_ioerror
+            resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
             resp.msg = old_resp.msg
             resp.msg = old_resp.msg
         # deflate
         # deflate
         if resp.headers.get('Content-encoding', '') == 'deflate':
         if resp.headers.get('Content-encoding', '') == 'deflate':
@@ -711,3 +733,31 @@ class DateRange(object):
         return self.start <= date <= self.end
         return self.start <= date <= self.end
     def __str__(self):
     def __str__(self):
         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
+
+
+def platform_name():
+    """ Returns the platform name as a compat_str """
+    res = platform.platform()
+    if isinstance(res, bytes):
+        res = res.decode(preferredencoding())
+
+    assert isinstance(res, compat_str)
+    return res
+
+
+def bytes_to_intlist(bs):
+    if not bs:
+        return []
+    if isinstance(bs[0], int):  # Python 3
+        return list(bs)
+    else:
+        return [ord(c) for c in bs]
+
+
+def intlist_to_bytes(xs):
+    if not xs:
+        return b''
+    if isinstance(chr(0), bytes):  # Python 2
+        return ''.join([chr(x) for x in xs])
+    else:
+        return bytes(xs)

+ 1 - 1
youtube_dl/version.py

@@ -1,2 +1,2 @@
 
 
-__version__ = '2013.08.22'
+__version__ = '2013.09.06.1'