Browse Source

[vporn] Make video URL regex more strict

There is a garbage instead of proper URL for some HD videos
Sergey M․ 11 years ago
parent
commit
59d284c316
1 changed files with 43 additions and 17 deletions
  1. 43 17
      youtube_dl/extractor/vporn.py

+ 43 - 17
youtube_dl/extractor/vporn.py

@@ -11,22 +11,48 @@ from ..utils import (
 
 
 class VpornIE(InfoExtractor):
 class VpornIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
     _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
-        'md5': 'facf37c1b86546fa0208058546842c55',
-        'info_dict': {
-            'id': '497944',
-            'display_id': 'violet-on-her-th-birthday',
-            'ext': 'mp4',
-            'title': 'Violet on her 19th birthday',
-            'description': 'Violet dances in front of the camera which is sure to get you horny.',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'uploader': 'kileyGrope',
-            'categories': ['Masturbation', 'Teen'],
-            'duration': 393,
-            'age_limit': 18,
-        }
-    }
+    _TESTS = [
+        {
+            'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
+            'md5': 'facf37c1b86546fa0208058546842c55',
+            'info_dict': {
+                'id': '497944',
+                'display_id': 'violet-on-her-th-birthday',
+                'ext': 'mp4',
+                'title': 'Violet on her 19th birthday',
+                'description': 'Violet dances in front of the camera which is sure to get you horny.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'kileyGrope',
+                'categories': ['Masturbation', 'Teen'],
+                'duration': 393,
+                'age_limit': 18,
+                'view_count': int,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+            }
+        },
+        {
+            'url': 'http://www.vporn.com/female/hana-shower/523564/',
+            'md5': 'ced35a4656198a1664cf2cda1575a25f',
+            'info_dict': {
+                'id': '523564',
+                'display_id': 'hana-shower',
+                'ext': 'mp4',
+                'title': 'Hana Shower',
+                'description': 'Hana showers at the bathroom.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'Hmmmmm',
+                'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'],
+                'duration': 588,
+                'age_limit': 18,
+                'view_count': int,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+            }
+        },
+    ]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
@@ -64,7 +90,7 @@ class VpornIE(InfoExtractor):
 
 
         formats = []
         formats = []
 
 
-        for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"([^"]+)"', webpage):
+        for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
             video_url = video[1]
             video_url = video[1]
             fmt = {
             fmt = {
                 'url': video_url,
                 'url': video_url,