8 years ago · 21fbf0f955
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -1,7 +1,9 @@
 
				 # coding: utf-8
			
 
				 from __future__ import unicode_literals
			
 
				 
			
 
				+import functools
			
 
				 import itertools
			
 
				+import operator
			
 
				 # import os
			
 
				 import re
			
 
				 
			
@@ -129,9 +131,38 @@ class PornHubIE(InfoExtractor):
 
				 
			
 
				         tv_webpage = dl_webpage('tv')
			
 
				 
			
 
				-        video_url = self._search_regex(
			
 
				-            r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage,
			
 
				-            'video url', group='url')
			
 
				+        encoded_url = self._search_regex(r'(var.*mediastring.*)</script>',
			
 
				+            tv_webpage, 'encoded url')
			
 
				+        assignments = encoded_url.split(";")
			
 
				+        js_vars = {}
			
 
				+
			
 
				+        def parse_js_value(inp):
			
 
				+            inp = re.sub(r'/\*[^*]*\*/', "", inp)
			
 
				+
			
 
				+            if "+" in inp:
			
 
				+                inps = inp.split("+")
			
 
				+                return functools.reduce(operator.concat, map(parse_js_value, inps))
			
 
				+
			
 
				+            inp = inp.strip()
			
 
				+            if inp in js_vars:
			
 
				+                return js_vars[inp]
			
 
				+
			
 
				+            # Hope it's a string!
			
 
				+            assert inp.startswith('"') and inp.endswith('"')
			
 
				+            return inp[1:-1]
			
 
				+
			
 
				+        for assn in assignments:
			
 
				+            assn = assn.strip()
			
 
				+            if len(assn) == 0:
			
 
				+                continue
			
 
				+
			
 
				+            assert assn.startswith("var ")
			
 
				+            assn = assn[4:]
			
 
				+            vname, value = assn.split("=", 1)
			
 
				+
			
 
				+            js_vars[vname] = parse_js_value(value)
			
 
				+
			
 
				+        video_url = js_vars["mediastring"]
			
 
				 
			
 
				         title = self._search_regex(
			
 
				             r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)