9 år sedan · 315bdae00a
--- a/youtube_dl/extractor/zippcast.py
+++ b/youtube_dl/extractor/zippcast.py
@@ -1,58 +1,94 @@
 
				-# coding: utf-8
			
 
				 from __future__ import unicode_literals
			
 
				 
			
 
				+import re
			
 
				+
			
 
				 from .common import InfoExtractor
			
 
				+from ..utils import (
			
 
				+    determine_ext,
			
 
				+    str_to_int,
			
 
				+)
			
 
				 
			
 
				 
			
 
				 class ZippCastIE(InfoExtractor):
			
 
				-    _VALID_URL = r'https?://(?:www\.)?zippcast\.com/video/(?P<id>[0-9a-zA-Z]+)'
			
 
				+    _VALID_URL = r'https?://(?:www\.)?zippcast\.com/(?:video/|videoview\.php\?.*\bvplay=)(?P<id>[0-9a-zA-Z]+)'
			
 
				     _TESTS = [{
			
 
				+        # m3u8, hq direct link
			
 
				         'url': 'http://www.zippcast.com/video/c9cfd5c7e44dbc29c81',
			
 
				-        'md5': 'f2aea8659962d9155031aaeac53f7c54',
			
 
				+        'md5': '5ea0263b5606866c4d6cda0fc5e8c6b6',
			
 
				         'info_dict': {
			
 
				             'id': 'c9cfd5c7e44dbc29c81',
			
 
				             'ext': 'mp4',
			
 
				             'title': '[Vinesauce] Vinny - Digital Space Traveler',
			
 
				+            'description': 'Muted on youtube, but now uploaded in it\'s original form.',
			
 
				             'thumbnail': 're:^https?://.*\.jpg$',
			
 
				             'uploader': 'vinesauce',
			
 
				-            'description': 'Muted on youtube, but now uploaded in it\'s original form.',
			
 
				-            'categories': ['Entertainment'],
			
 
				             'view_count': int,
			
 
				+            'categories': ['Entertainment'],
			
 
				+            'tags': list,
			
 
				         },
			
 
				     }, {
			
 
				+        # f4m, lq ipod direct link
			
 
				         'url': 'http://www.zippcast.com/video/b79c0a233e9c6581775',
			
 
				-        'md5': 'b8631f0cc48ed15387f9179988d0c97c',
			
 
				-        'info_dict': {
			
 
				-            'id': 'b79c0a233e9c6581775',
			
 
				-            'ext': 'mp4',
			
 
				-            'title': 'Battlefield Hardline Trailer',
			
 
				-            'thumbnail': 're:^https?://.*\.jpg$',
			
 
				-            'uploader': 'IGXGaming',
			
 
				-            'description': 'Battlefield Hardline Trailer',
			
 
				-            'categories': ['Gaming'],
			
 
				-            'view_count': int,
			
 
				-        },
			
 
				+        'only_matching': True,
			
 
				+    }, {
			
 
				+        'url': 'http://www.zippcast.com/videoview.php?vplay=c9cfd5c7e44dbc29c81&auto=no',
			
 
				+        'only_matching': True,
			
 
				     }]
			
 
				 
			
 
				     def _real_extract(self, url):
			
 
				         video_id = self._match_id(url)
			
 
				-        webpage = self._download_webpage(url, video_id)
			
 
				 
			
 
				-        title = self._html_search_regex(r'title="(.+?)"', webpage, 'title')
			
 
				-        uploader = self._html_search_regex(r'http://www.zippcast.com/profile/(.+?)">', webpage, 'uploader')
			
 
				-        url = self._html_search_regex(r'<source src="(.+?)" type="', webpage, 'url')
			
 
				-        description = self._html_search_regex(r'<span class="vdescr".+>(.+?)</span>', webpage, 'description')
			
 
				-        thumbnail = self._html_search_regex(r'poster="(.+?)" controls>', webpage, 'thumbnail')
			
 
				-        categories = self._html_search_regex(r'<a href="http://www.zippcast.com/categories/(.+?)"', webpage, 'categories')
			
 
				-        view_count = self._html_search_regex(r'<td align="right"><h3>(.+?) views!', webpage, 'view_count')
			
 
				+        webpage = self._download_webpage(
			
 
				+            'http://www.zippcast.com/video/%s' % video_id, video_id)
			
 
				+
			
 
				+        formats = []
			
 
				+        video_url = self._search_regex(
			
 
				+            r'<source[^>]+src=(["\'])(?P<url>.+?)\1', webpage,
			
 
				+            'video url', default=None, group='url')
			
 
				+        if video_url:
			
 
				+            formats.append({
			
 
				+                'url': video_url,
			
 
				+                'format_id': 'http',
			
 
				+                'preference': 0,  # direct link is almost always of worse quality
			
 
				+            })
			
 
				+        src_url = self._search_regex(
			
 
				+            r'src\s*:\s*(?:escape\()?(["\'])(?P<url>http://.+?)\1',
			
 
				+            webpage, 'src', default=None, group='url')
			
 
				+        ext = determine_ext(src_url)
			
 
				+        if ext == 'm3u8':
			
 
				+            formats.extend(self._extract_m3u8_formats(
			
 
				+                src_url, video_id, 'mp4', entry_protocol='m3u8_native',
			
 
				+                m3u8_id='hls', fatal=False))
			
 
				+        elif ext == 'f4m':
			
 
				+            formats.extend(self._extract_f4m_formats(
			
 
				+                src_url, video_id, f4m_id='hds', fatal=False))
			
 
				+        self._sort_formats(formats)
			
 
				+
			
 
				+        title = self._og_search_title(webpage)
			
 
				+        description = self._og_search_description(webpage) or self._html_search_meta(
			
 
				+            'description', webpage)
			
 
				+        uploader = self._search_regex(
			
 
				+            r'<a[^>]+href="https?://[^/]+/profile/[^>]+>([^<]+)</a>',
			
 
				+            webpage, 'uploader', fatal=False)
			
 
				+        thumbnail = self._og_search_thumbnail(webpage)
			
 
				+        view_count = str_to_int(self._search_regex(
			
 
				+            r'>([\d,.]+) views!', webpage, 'view count', fatal=False))
			
 
				+
			
 
				+        categories = re.findall(
			
 
				+            r'<a[^>]+href="https?://[^/]+/categories/[^"]+">([^<]+),?<',
			
 
				+            webpage)
			
 
				+        tags = re.findall(
			
 
				+            r'<a[^>]+href="https?://[^/]+/search/tags/[^"]+">([^<]+),?<',
			
 
				+            webpage)
			
 
				 
			
 
				         return {
			
 
				             'id': video_id,
			
 
				             'title': title,
			
 
				-            'url': url,
			
 
				             'description': description,
			
 
				-            'uploader': uploader,
			
 
				             'thumbnail': thumbnail,
			
 
				-            'categories': [categories],
			
 
				-            'view_count': int(view_count.replace(',', '')),
			
 
				+            'uploader': uploader,
			
 
				+            'view_count': view_count,
			
 
				+            'categories': categories,
			
 
				+            'tags': tags,
			
 
				+            'formats': formats,
			
 
				         }