浏览代码

Merge remote-tracking branch 'origin/master'

Philipp Hagemeister 12 年之前
父节点
当前提交
ebc14f251c

+ 39 - 0
devscripts/check-porn.py

@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+"""
+This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
+if we are not 'age_limit' tagging some porn site
+"""
+
+# Allow direct execution
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import get_testcases
+from youtube_dl.utils import compat_urllib_request
+
+for test in get_testcases():
+    try:
+        webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
+    except:
+        print('\nFail: {0}'.format(test['name']))
+        continue
+
+    webpage = webpage.decode('utf8', 'replace')
+
+    if 'porn' in webpage.lower() and ('info_dict' not in test
+                                      or 'age_limit' not in test['info_dict']
+                                      or test['info_dict']['age_limit'] != 18):
+        print('\nPotential missing age_limit check: {0}'.format(test['name']))
+
+    elif 'porn' not in webpage.lower() and ('info_dict' in test and
+                                            'age_limit' in test['info_dict'] and
+                                            test['info_dict']['age_limit'] == 18):
+        print('\nPotential false negative: {0}'.format(test['name']))
+
+    else:
+        sys.stdout.write('.')
+    sys.stdout.flush()
+
+print()

+ 5 - 1
setup.py

@@ -8,6 +8,7 @@ import sys
 
 
 try:
 try:
     from setuptools import setup
     from setuptools import setup
+    setuptools_available = True
 except ImportError:
 except ImportError:
     from distutils.core import setup
     from distutils.core import setup
 
 
@@ -43,13 +44,16 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
     params = py2exe_params
     params = py2exe_params
 else:
 else:
     params = {
     params = {
-        'scripts': ['bin/youtube-dl'],
         'data_files': [  # Installing system-wide would require sudo...
         'data_files': [  # Installing system-wide would require sudo...
             ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
             ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
             ('share/doc/youtube_dl', ['README.txt']),
             ('share/doc/youtube_dl', ['README.txt']),
             ('share/man/man1/', ['youtube-dl.1'])
             ('share/man/man1/', ['youtube-dl.1'])
         ]
         ]
     }
     }
+    if setuptools_available:
+        params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
+    else:
+        params['scripts'] = ['bin/youtube-dl']
 
 
 # Get the version from youtube_dl/version.py without importing the package
 # Get the version from youtube_dl/version.py without importing the package
 exec(compile(open('youtube_dl/version.py').read(),
 exec(compile(open('youtube_dl/version.py').read(),

+ 1 - 1
youtube_dl/YoutubeDL.py

@@ -462,7 +462,7 @@ class YoutubeDL(object):
             info_dict['playlist_index'] = None
             info_dict['playlist_index'] = None
 
 
         # This extractors handle format selection themselves
         # This extractors handle format selection themselves
-        if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']:
+        if info_dict['extractor'] in [u'youtube', u'Youku', u'mixcloud']:
             if download:
             if download:
                 self.process_info(info_dict)
                 self.process_info(info_dict)
             return info_dict
             return info_dict

+ 4 - 0
youtube_dl/extractor/__init__.py

@@ -72,6 +72,7 @@ from .jeuxvideo import JeuxVideoIE
 from .jukebox import JukeboxIE
 from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
 from .justintv import JustinTVIE
 from .kankan import KankanIE
 from .kankan import KankanIE
+from .keezmovies import KeezMoviesIE
 from .kickstarter import KickStarterIE
 from .kickstarter import KickStarterIE
 from .keek import KeekIE
 from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .liveleak import LiveLeakIE
@@ -94,6 +95,7 @@ from .ooyala import OoyalaIE
 from .orf import ORFIE
 from .orf import ORFIE
 from .pbs import PBSIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .photobucket import PhotobucketIE
+from .pornhub import PornHubIE
 from .pornotube import PornotubeIE
 from .pornotube import PornotubeIE
 from .rbmaradio import RBMARadioIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
 from .redtube import RedTubeIE
@@ -109,6 +111,7 @@ from .slideshare import SlideshareIE
 from .sohu import SohuIE
 from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
 from .southparkstudios import SouthParkStudiosIE
 from .southparkstudios import SouthParkStudiosIE
+from .spankwire import SpankwireIE
 from .spiegel import SpiegelIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .statigram import StatigramIE
 from .statigram import StatigramIE
@@ -121,6 +124,7 @@ from .tf1 import TF1IE
 from .thisav import ThisAVIE
 from .thisav import ThisAVIE
 from .traileraddict import TrailerAddictIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .trilulilu import TriluliluIE
+from .tube8 import Tube8IE
 from .tudou import TudouIE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
 from .tutv import TutvIE

+ 6 - 4
youtube_dl/extractor/addanime.py

@@ -17,8 +17,8 @@ class AddAnimeIE(InfoExtractor):
     IE_NAME = u'AddAnime'
     IE_NAME = u'AddAnime'
     _TEST = {
     _TEST = {
         u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
         u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
-        u'file': u'24MR3YO5SAS9.flv',
-        u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
+        u'file': u'24MR3YO5SAS9.mp4',
+        u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
         u'info_dict': {
         u'info_dict': {
             u"description": u"One Piece 606",
             u"description": u"One Piece 606",
             u"title": u"One Piece 606"
             u"title": u"One Piece 606"
@@ -60,8 +60,10 @@ class AddAnimeIE(InfoExtractor):
                 note=u'Confirming after redirect')
                 note=u'Confirming after redirect')
             webpage = self._download_webpage(url, video_id)
             webpage = self._download_webpage(url, video_id)
 
 
-        video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
+        video_url = self._search_regex(r"var (?:hq|normal)_video_file = '(.*?)';",
                                        webpage, u'video file URL')
                                        webpage, u'video file URL')
+        
+        video_extension = video_url[-3:]  # mp4 or flv ?
         video_title = self._og_search_title(webpage)
         video_title = self._og_search_title(webpage)
         video_description = self._og_search_description(webpage)
         video_description = self._og_search_description(webpage)
 
 
@@ -69,7 +71,7 @@ class AddAnimeIE(InfoExtractor):
             '_type': 'video',
             '_type': 'video',
             'id':  video_id,
             'id':  video_id,
             'url': video_url,
             'url': video_url,
-            'ext': 'flv',
+            'ext': video_extension,
             'title': video_title,
             'title': video_title,
             'description': video_description
             'description': video_description
         }
         }

+ 4 - 4
youtube_dl/extractor/common.py

@@ -330,10 +330,10 @@ class InfoExtractor(object):
     def _og_search_title(self, html, **kargs):
     def _og_search_title(self, html, **kargs):
         return self._og_search_property('title', html, **kargs)
         return self._og_search_property('title', html, **kargs)
 
 
-    def _og_search_video_url(self, html, name='video url', **kargs):
-        return self._html_search_regex([self._og_regex('video:secure_url'),
-                                        self._og_regex('video')],
-                                       html, name, **kargs)
+    def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
+        regexes = [self._og_regex('video')]
+        if secure: regexes.insert(0, self._og_regex('video:secure_url'))
+        return self._html_search_regex(regexes, html, name, **kargs)
 
 
     def _rta_search(self, html):
     def _rta_search(self, html):
         # See http://www.rtalabel.org/index.php?content=howtofaq#single
         # See http://www.rtalabel.org/index.php?content=howtofaq#single

+ 7 - 7
youtube_dl/extractor/exfm.py

@@ -11,14 +11,14 @@ class ExfmIE(InfoExtractor):
     _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
     _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
     _TESTS = [
     _TESTS = [
         {
         {
-            u'url': u'http://ex.fm/song/1bgtzg',
-            u'file': u'95223130.mp3',
-            u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
+            u'url': u'http://ex.fm/song/eh359',
+            u'file': u'44216187.mp3',
+            u'md5': u'e45513df5631e6d760970b14cc0c11e7',
             u'info_dict': {
             u'info_dict': {
-                u"title": u"We Can't Stop - Miley Cyrus",
-                u"uploader": u"Miley Cyrus",
-                u'upload_date': u'20130603',
-                u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
+                u"title": u"Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive",
+                u"uploader": u"deadjournalist",
+                u'upload_date': u'20120424',
+                u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
             },
             },
             u'note': u'Soundcloud song',
             u'note': u'Soundcloud song',
         },
         },

+ 1 - 1
youtube_dl/extractor/instagram.py

@@ -26,7 +26,7 @@ class InstagramIE(InfoExtractor):
 
 
         return [{
         return [{
             'id':        video_id,
             'id':        video_id,
-            'url':       self._og_search_video_url(webpage),
+            'url':       self._og_search_video_url(webpage, secure=False),
             'ext':       'mp4',
             'ext':       'mp4',
             'title':     u'Video by %s' % uploader_id,
             'title':     u'Video by %s' % uploader_id,
             'thumbnail': self._og_search_thumbnail(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),

+ 61 - 0
youtube_dl/extractor/keezmovies.py

@@ -0,0 +1,61 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+)
+from ..aes import (
+    aes_decrypt_text
+)
+
+class KeezMoviesIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))'
+    _TEST = {
+        u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
+        u'file': u'1214711.mp4',
+        u'md5': u'6e297b7e789329923fcf83abb67c9289',
+        u'info_dict': {
+            u"title": u"Petite Asian Lady Mai Playing In Bathtub",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        # embedded video
+        mobj = re.search(r'href="([^"]+)"></iframe>', webpage)
+        if mobj:
+            embedded_url = mobj.group(1)
+            return self.url_result(embedded_url)
+
+        video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title')
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url'))
+        if webpage.find('encrypted=true')!=-1:
+            password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, u'password')
+            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
+        path = compat_urllib_parse_urlparse( video_url ).path
+        extension = os.path.splitext( path )[1][1:]
+        format = path.split('/')[4].split('_')[:2]
+        format = "-".join( format )
+
+        age_limit = self._rta_search(webpage)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'url': video_url,
+            'ext': extension,
+            'format': format,
+            'format_id': format,
+            'age_limit': age_limit,
+        }

+ 69 - 0
youtube_dl/extractor/pornhub.py

@@ -0,0 +1,69 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+    unescapeHTML,
+)
+from ..aes import (
+    aes_decrypt_text
+)
+
+class PornHubIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9]+))'
+    _TEST = {
+        u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015',
+        u'file': u'648719015.mp4',
+        u'md5': u'882f488fa1f0026f023f33576004a2ed',
+        u'info_dict': {
+            u"uploader": u"BABES-COM", 
+            u"title": u"Seductive Indian beauty strips down and fingers her pink pussy",
+            u"age_limit": 18
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, u'title')
+        video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
+        thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
+        if thumbnail:
+            thumbnail = compat_urllib_parse.unquote(thumbnail)
+
+        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
+        if webpage.find('"encrypted":true') != -1:
+            password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password').replace('+', ' ')
+            video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
+
+        formats = []
+        for video_url in video_urls:
+            path = compat_urllib_parse_urlparse( video_url ).path
+            extension = os.path.splitext( path )[1][1:]
+            format = path.split('/')[5].split('_')[:2]
+            format = "-".join( format )
+            formats.append({
+                'url': video_url,
+                'ext': extension,
+                'format': format,
+                'format_id': format,
+            })
+        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
+
+        return {
+            'id': video_id,
+            'uploader': video_uploader,
+            'title': video_title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+            'age_limit': 18,
+        }

+ 2 - 1
youtube_dl/extractor/pornotube.py

@@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor):
         u'md5': u'374dd6dcedd24234453b295209aa69b6',
         u'md5': u'374dd6dcedd24234453b295209aa69b6',
         u'info_dict': {
         u'info_dict': {
             u"upload_date": u"20090708", 
             u"upload_date": u"20090708", 
-            u"title": u"Marilyn-Monroe-Bathing"
+            u"title": u"Marilyn-Monroe-Bathing",
+            u"age_limit": 18
         }
         }
     }
     }
 
 

+ 74 - 0
youtube_dl/extractor/spankwire.py

@@ -0,0 +1,74 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+    unescapeHTML,
+)
+from ..aes import (
+    aes_decrypt_text
+)
+
+class SpankwireIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
+    _TEST = {
+        u'url': u'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
+        u'file': u'103545.mp4',
+        u'md5': u'1b3f55e345500552dbc252a3e9c1af43',
+        u'info_dict': {
+            u"uploader": u"oreusz", 
+            u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
+            u"description": u"Crazy Bitch X rated music video.",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
+        video_uploader = self._html_search_regex(r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
+        thumbnail = self._html_search_regex(r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
+        description = self._html_search_regex(r'>\s*Description:</div>\s*<[^>]*>([^<]+)', webpage, u'description', fatal=False)
+        if len(description) == 0:
+            description = None
+
+        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
+        if webpage.find('flashvars\.encrypted = "true"') != -1:
+            password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ')
+            video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
+
+        formats = []
+        for video_url in video_urls:
+            path = compat_urllib_parse_urlparse( video_url ).path
+            extension = os.path.splitext( path )[1][1:]
+            format = path.split('/')[4].split('_')[:2]
+            format = "-".join( format )
+            formats.append({
+                'url': video_url,
+                'ext': extension,
+                'format': format,
+                'format_id': format,
+            })
+        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
+
+        age_limit = self._rta_search(webpage)
+
+        return {
+            'id': video_id,
+            'uploader': video_uploader,
+            'title': video_title,
+            'thumbnail': thumbnail,
+            'description': description,
+            'formats': formats,
+            'age_limit': age_limit,
+        }

+ 65 - 0
youtube_dl/extractor/tube8.py

@@ -0,0 +1,65 @@
+import os
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    compat_urllib_request,
+    compat_urllib_parse,
+    unescapeHTML,
+)
+from ..aes import (
+    aes_decrypt_text
+)
+
+class Tube8IE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
+    _TEST = {
+        u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
+        u'file': u'229795.mp4',
+        u'md5': u'e9e0b0c86734e5e3766e653509475db0',
+        u'info_dict': {
+            u"description": u"hot teen Kasia grinding", 
+            u"uploader": u"unknown", 
+            u"title": u"Kasia music video",
+            u"age_limit": 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        video_title = self._html_search_regex(r'videotitle	="([^"]+)', webpage, u'title')
+        video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False)
+        video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
+        thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
+        if thumbnail:
+            thumbnail = thumbnail.replace('\\/', '/')
+
+        video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url')
+        if webpage.find('"encrypted":true')!=-1:
+            password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
+            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
+        path = compat_urllib_parse_urlparse( video_url ).path
+        extension = os.path.splitext( path )[1][1:]
+        format = path.split('/')[4].split('_')[:2]
+        format = "-".join( format )
+
+        return {
+            'id': video_id,
+            'uploader': video_uploader,
+            'title': video_title,
+            'thumbnail': thumbnail,
+            'description': video_description,
+            'url': video_url,
+            'ext': extension,
+            'format': format,
+            'format_id': format,
+            'age_limit': 18,
+        }

+ 37 - 15
youtube_dl/extractor/xhamster.py

@@ -36,21 +36,25 @@ class XHamsterIE(InfoExtractor):
     }]
     }]
 
 
     def _real_extract(self,url):
     def _real_extract(self,url):
+        def extract_video_url(webpage):
+            mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
+            if mobj is None:
+                raise ExtractorError(u'Unable to extract media URL')
+            if len(mobj.group('server')) == 0:
+                return compat_urllib_parse.unquote(mobj.group('file'))
+            else:
+                return mobj.group('server')+'/key='+mobj.group('file')
+
+        def is_hd(webpage):
+            return webpage.find('<div class=\'icon iconHD\'>') != -1
+
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
 
 
         video_id = mobj.group('id')
         video_id = mobj.group('id')
         seo = mobj.group('seo')
         seo = mobj.group('seo')
-        mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo)
+        mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo)
         webpage = self._download_webpage(mrss_url, video_id)
         webpage = self._download_webpage(mrss_url, video_id)
 
 
-        mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract media URL')
-        if len(mobj.group('server')) == 0:
-            video_url = compat_urllib_parse.unquote(mobj.group('file'))
-        else:
-            video_url = mobj.group('server')+'/key='+mobj.group('file')
-
         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
             webpage, u'title')
             webpage, u'title')
 
 
@@ -76,14 +80,32 @@ class XHamsterIE(InfoExtractor):
 
 
         age_limit = self._rta_search(webpage)
         age_limit = self._rta_search(webpage)
 
 
-        return [{
-            'id':       video_id,
-            'url':      video_url,
-            'ext':      determine_ext(video_url),
-            'title':    video_title,
+        video_url = extract_video_url(webpage)
+        hd = is_hd(webpage)
+        formats = [{
+            'url': video_url,
+            'ext': determine_ext(video_url),
+            'format': 'hd' if hd else 'sd',
+            'format_id': 'hd' if hd else 'sd',
+        }]
+        if not hd:
+            webpage = self._download_webpage(mrss_url+'?hd', video_id)
+            if is_hd(webpage):
+                video_url = extract_video_url(webpage)
+                formats.append({
+                    'url': video_url,
+                    'ext': determine_ext(video_url),
+                    'format': 'hd',
+                    'format_id': 'hd',
+                })
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'formats': formats,
             'description': video_description,
             'description': video_description,
             'upload_date': video_upload_date,
             'upload_date': video_upload_date,
             'uploader_id': video_uploader_id,
             'uploader_id': video_uploader_id,
             'thumbnail': video_thumbnail,
             'thumbnail': video_thumbnail,
             'age_limit': age_limit,
             'age_limit': age_limit,
-        }]
+        }

+ 6 - 2
youtube_dl/extractor/youjizz.py

@@ -13,7 +13,8 @@ class YouJizzIE(InfoExtractor):
         u'file': u'2189178.flv',
         u'file': u'2189178.flv',
         u'md5': u'07e15fa469ba384c7693fd246905547c',
         u'md5': u'07e15fa469ba384c7693fd246905547c',
         u'info_dict': {
         u'info_dict': {
-            u"title": u"Zeichentrick 1"
+            u"title": u"Zeichentrick 1",
+            u"age_limit": 18,
         }
         }
     }
     }
 
 
@@ -25,6 +26,8 @@ class YouJizzIE(InfoExtractor):
         # Get webpage content
         # Get webpage content
         webpage = self._download_webpage(url, video_id)
         webpage = self._download_webpage(url, video_id)
 
 
+        age_limit = self._rta_search(webpage)
+
         # Get the video title
         # Get the video title
         video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
         video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
             webpage, u'title').strip()
             webpage, u'title').strip()
@@ -60,6 +63,7 @@ class YouJizzIE(InfoExtractor):
                 'title': video_title,
                 'title': video_title,
                 'ext': 'flv',
                 'ext': 'flv',
                 'format': 'flv',
                 'format': 'flv',
-                'player_url': embed_page_url}
+                'player_url': embed_page_url,
+                'age_limit': age_limit}
 
 
         return [info]
         return [info]

+ 28 - 53
youtube_dl/extractor/youporn.py

@@ -17,7 +17,7 @@ from ..aes import (
 )
 )
 
 
 class YouPornIE(InfoExtractor):
 class YouPornIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
     _TEST = {
     _TEST = {
         u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
         u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
         u'file': u'505835.mp4',
         u'file': u'505835.mp4',
@@ -31,23 +31,10 @@ class YouPornIE(InfoExtractor):
         }
         }
     }
     }
 
 
-    def _print_formats(self, formats):
-        """Print all available formats"""
-        print(u'Available formats:')
-        print(u'ext\t\tformat')
-        print(u'---------------------------------')
-        for format in formats:
-            print(u'%s\t\t%s'  % (format['ext'], format['format']))
-
-    def _specific(self, req_format, formats):
-        for x in formats:
-            if x["format"] == req_format:
-                return x
-        return None
-
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('videoid')
         video_id = mobj.group('videoid')
+        url = 'http://www.' + mobj.group('url')
 
 
         req = compat_urllib_request.Request(url)
         req = compat_urllib_request.Request(url)
         req.add_header('Cookie', 'age_verified=1')
         req.add_header('Cookie', 'age_verified=1')
@@ -71,27 +58,22 @@ class YouPornIE(InfoExtractor):
         except KeyError:
         except KeyError:
             raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
             raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
 
 
-        # Get all of the formats available
+        # Get all of the links from the page
         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
         download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
         download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
             webpage, u'download list').strip()
             webpage, u'download list').strip()
-
-        # Get all of the links from the page
-        LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
+        LINK_RE = r'<a href="([^"]+)">'
         links = re.findall(LINK_RE, download_list_html)
         links = re.findall(LINK_RE, download_list_html)
-        
-        # Get link of hd video if available
-        mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
-        if mobj != None:
-            encrypted_video_url = mobj.group(u'encrypted_video_url')
-            video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
-            links = [video_url] + links
+
+        # Get all encrypted links
+        encrypted_links = re.findall(r'var encryptedQuality[0-9]{3}URL = \'([a-zA-Z0-9+/]+={0,2})\';', webpage)
+        for encrypted_link in encrypted_links:
+            link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')
+            links.append(link)
         
         
         if not links:
         if not links:
             raise ExtractorError(u'ERROR: no known formats available for video')
             raise ExtractorError(u'ERROR: no known formats available for video')
 
 
-        self.to_screen(u'Links found: %d' % len(links))
-
         formats = []
         formats = []
         for link in links:
         for link in links:
 
 
@@ -103,39 +85,32 @@ class YouPornIE(InfoExtractor):
             path = compat_urllib_parse_urlparse( video_url ).path
             path = compat_urllib_parse_urlparse( video_url ).path
             extension = os.path.splitext( path )[1][1:]
             extension = os.path.splitext( path )[1][1:]
             format = path.split('/')[4].split('_')[:2]
             format = path.split('/')[4].split('_')[:2]
+
             # size = format[0]
             # size = format[0]
             # bitrate = format[1]
             # bitrate = format[1]
             format = "-".join( format )
             format = "-".join( format )
             # title = u'%s-%s-%s' % (video_title, size, bitrate)
             # title = u'%s-%s-%s' % (video_title, size, bitrate)
 
 
             formats.append({
             formats.append({
-                'id': video_id,
                 'url': video_url,
                 'url': video_url,
-                'uploader': video_uploader,
-                'upload_date': upload_date,
-                'title': video_title,
                 'ext': extension,
                 'ext': extension,
                 'format': format,
                 'format': format,
-                'thumbnail': thumbnail,
-                'description': video_description,
-                'age_limit': age_limit,
+                'format_id': format,
             })
             })
 
 
-        if self._downloader.params.get('listformats', None):
-            self._print_formats(formats)
-            return
-
-        req_format = self._downloader.params.get('format', 'best')
-        self.to_screen(u'Format: %s' % req_format)
-
-        if req_format is None or req_format == 'best':
-            return [formats[0]]
-        elif req_format == 'worst':
-            return [formats[-1]]
-        elif req_format in ('-1', 'all'):
-            return formats
-        else:
-            format = self._specific( req_format, formats )
-            if format is None:
-                raise ExtractorError(u'Requested format not available')
-            return [format]
+        # Sort and remove doubles
+        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
+        for i in range(len(formats)-1,0,-1):
+            if formats[i]['format_id'] == formats[i-1]['format_id']:
+                del formats[i]
+        
+        return {
+            'id': video_id,
+            'uploader': video_uploader,
+            'upload_date': upload_date,
+            'title': video_title,
+            'thumbnail': thumbnail,
+            'description': video_description,
+            'age_limit': age_limit,
+            'formats': formats,
+        }