소스 검색

[Bigflix] Add new extractor for bigflix.com

Add an IE to support bigflix.com. It uses some sort of silverlight
plugin whose video url is being populated using base64 encoded
flashvars. So it is quite straightforward to extract.
Vignesh Venkat 9 년 전
부모
커밋
0a899a1448
2개의 변경된 파일43개의 추가작업 그리고 0개의 파일을 삭제
  1. 1 0
      youtube_dl/extractor/__init__.py
  2. 42 0
      youtube_dl/extractor/bigflix.py

+ 1 - 0
youtube_dl/extractor/__init__.py

@@ -61,6 +61,7 @@ from .beeg import BeegIE
 from .behindkink import BehindKinkIE
 from .behindkink import BehindKinkIE
 from .beatportpro import BeatportProIE
 from .beatportpro import BeatportProIE
 from .bet import BetIE
 from .bet import BetIE
+from .bigflix import BigflixIE
 from .bild import BildIE
 from .bild import BildIE
 from .bilibili import BiliBiliIE
 from .bilibili import BiliBiliIE
 from .bleacherreport import (
 from .bleacherreport import (

+ 42 - 0
youtube_dl/extractor/bigflix.py

@@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from base64 import b64decode
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+
+
+class BigflixIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.*/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
+        'md5': 'ec76aa9b1129e2e5b301a474e54fab74',
+        'info_dict': {
+            'id': '16537',
+            'ext': 'mp4',
+            'title': 'Singham Returns',
+            'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(
+            r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
+            webpage, 'title')
+
+        video_url = b64decode(compat_urllib_parse_unquote(self._search_regex(
+            r'file=([^&]+)', webpage, 'video url')).encode('ascii')).decode('utf-8')
+
+        description = self._html_search_meta('description', webpage)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'description': description,
+        }