12 years ago · ef4fd84857
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -488,7 +488,8 @@ class YoutubeDL(object):
 
				                 new_result = ie_result.copy()
			
 
				                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
			
 
				                           'entries', 'urlhandle', 'ie_key', 'duration',
			
 
				-                          'subtitles', 'annotations', 'format'):
			
 
				+                          'subtitles', 'annotations', 'format',
			
 
				+                          'thumbnail', 'thumbnails'):
			
 
				                     if f in new_result:
			
 
				                         del new_result[f]
			
 
				                     if f in embedded_info:
			
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -178,6 +178,7 @@ from .wat import WatIE
 
				 from .websurg import WeBSurgIE
			
 
				 from .weibo import WeiboIE
			
 
				 from .wimp import WimpIE
			
 
				+from .wistia import WistiaIE
			
 
				 from .worldstarhiphop import WorldStarHipHopIE
			
 
				 from .xhamster import XHamsterIE
			
 
				 from .xnxx import XNXXIE
			
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -169,8 +169,13 @@ class GenericIE(InfoExtractor):
 
				         #   Site Name | Video Title
			
 
				         #   Video Title - Tagline | Site Name
			
 
				         # and so on and so forth; it's just not practical
			
 
				-        video_title = self._html_search_regex(r'<title>(.*)</title>',
			
 
				-            webpage, u'video title', default=u'video', flags=re.DOTALL)
			
 
				+        video_title = self._html_search_regex(
			
 
				+            r'(?s)<title>(.*?)</title>', webpage, u'video title',
			
 
				+            default=u'video')
			
 
				+
			
 
				+        # video uploader is domain name
			
 
				+        video_uploader = self._search_regex(
			
 
				+            r'^(?:https?://)?([^/]*)/.*', url, u'video uploader')
			
 
				 
			
 
				         # Look for BrightCove:
			
 
				         bc_url = BrightcoveIE._extract_brightcove_url(webpage)
			
@@ -188,7 +193,7 @@ class GenericIE(InfoExtractor):
 
				 
			
 
				         # Look for embedded YouTube player
			
 
				         matches = re.findall(
			
 
				-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
			
 
				+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
			
 
				         if matches:
			
 
				             urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
			
 
				                      for tuppl in matches]
			
@@ -197,13 +202,26 @@ class GenericIE(InfoExtractor):
 
				 
			
 
				         # Look for embedded Dailymotion player
			
 
				         matches = re.findall(
			
 
				-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion.com/embed/video/.+?)\1', webpage)
			
 
				+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
			
 
				         if matches:
			
 
				             urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
			
 
				                      for tuppl in matches]
			
 
				             return self.playlist_result(
			
 
				                 urlrs, playlist_id=video_id, playlist_title=video_title)
			
 
				 
			
 
				+        # Look for embedded Wistia player
			
 
				+        match = re.search(
			
 
				+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
			
 
				+        if match:
			
 
				+            return {
			
 
				+                '_type': 'url_transparent',
			
 
				+                'url': unescapeHTML(match.group('url')),
			
 
				+                'ie_key': 'Wistia',
			
 
				+                'uploader': video_uploader,
			
 
				+                'title': video_title,
			
 
				+                'id': video_id,
			
 
				+            }
			
 
				+
			
 
				         # Look for Bandcamp pages with custom domain
			
 
				         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
			
 
				         if mobj is not None:
			
@@ -247,14 +265,9 @@ class GenericIE(InfoExtractor):
 
				         # here's a fun little line of code for you:
			
 
				         video_id = os.path.splitext(video_id)[0]
			
 
				 
			
 
				-        # video uploader is domain name
			
 
				-        video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
			
 
				-            url, u'video uploader')
			
 
				-
			
 
				         return {
			
 
				             'id':       video_id,
			
 
				             'url':      video_url,
			
 
				             'uploader': video_uploader,
			
 
				-            'upload_date':  None,
			
 
				             'title':    video_title,
			
 
				         }
			
--- a/youtube_dl/extractor/wistia.py
+++ b/youtube_dl/extractor/wistia.py
@@ -0,0 +1,55 @@
 
				+import json
			
 
				+import re
			
 
				+
			
 
				+from .common import InfoExtractor
			
 
				+
			
 
				+
			
 
				+class WistiaIE(InfoExtractor):
			
 
				+    _VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
			
 
				+
			
 
				+    _TEST = {
			
 
				+        u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt",
			
 
				+        u"file": u"sh7fpupwlt.mov",
			
 
				+        u"md5": u"cafeb56ec0c53c18c97405eecb3133df",
			
 
				+        u"info_dict": {
			
 
				+            u"title": u"cfh_resourceful_zdkh_final_1"
			
 
				+        },
			
 
				+    }
			
 
				+
			
 
				+    def _real_extract(self, url):
			
 
				+        mobj = re.match(self._VALID_URL, url)
			
 
				+        video_id = mobj.group('id')
			
 
				+
			
 
				+        webpage = self._download_webpage(url, video_id)
			
 
				+        data_json = self._html_search_regex(
			
 
				+            r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data')
			
 
				+
			
 
				+        data = json.loads(data_json)
			
 
				+
			
 
				+        formats = []
			
 
				+        thumbnails = []
			
 
				+        for atype, a in data['assets'].items():
			
 
				+            if atype == 'still':
			
 
				+                thumbnails.append({
			
 
				+                    'url': a['url'],
			
 
				+                    'resolution': '%dx%d' % (a['width'], a['height']),
			
 
				+                })
			
 
				+                continue
			
 
				+            if atype == 'preview':
			
 
				+                continue
			
 
				+            formats.append({
			
 
				+                'format_id': atype,
			
 
				+                'url': a['url'],
			
 
				+                'width': a['width'],
			
 
				+                'height': a['height'],
			
 
				+                'filesize': a['size'],
			
 
				+                'ext': a['ext'],
			
 
				+            })
			
 
				+        formats.sort(key=lambda a: a['filesize'])
			
 
				+
			
 
				+        return {
			
 
				+            'id': video_id,
			
 
				+            'title': data['name'],
			
 
				+            'formats': formats,
			
 
				+            'thumbnails': thumbnails,
			
 
				+        }