浏览代码

Move DailyMotion into its own file

Philipp Hagemeister 12 年之前
父节点
当前提交
219b8130df
共有 2 个文件被更改,包括 78 次插入68 次删除
  1. 1 68
      youtube_dl/InfoExtractors.py
  2. 77 0
      youtube_dl/extractor/dailymotion.py

+ 1 - 68
youtube_dl/InfoExtractors.py

@@ -24,80 +24,13 @@ from .utils import *
 
 
 
 
 from .extractor.common import InfoExtractor, SearchInfoExtractor
 from .extractor.common import InfoExtractor, SearchInfoExtractor
+from .extractor.dailymotion import DailymotionIE
 from .extractor.metacafe import MetacafeIE
 from .extractor.metacafe import MetacafeIE
 from .extractor.statigram import StatigramIE
 from .extractor.statigram import StatigramIE
 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeUserIE, YoutubeChannelIE
 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeUserIE, YoutubeChannelIE
 
 
 
 
 
 
-class DailymotionIE(InfoExtractor):
-    """Information Extractor for Dailymotion"""
-
-    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
-    IE_NAME = u'dailymotion'
-
-    def _real_extract(self, url):
-        # Extract id and simplified title from URL
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-
-        video_id = mobj.group(1).split('_')[0].split('?')[0]
-
-        video_extension = 'mp4'
-
-        # Retrieve video webpage to extract further information
-        request = compat_urllib_request.Request(url)
-        request.add_header('Cookie', 'family_filter=off')
-        webpage = self._download_webpage(request, video_id)
-
-        # Extract URL, uploader and title from webpage
-        self.report_extraction(video_id)
-        mobj = re.search(r'\s*var flashvars = (.*)', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract media URL')
-        flashvars = compat_urllib_parse.unquote(mobj.group(1))
-
-        for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']:
-            if key in flashvars:
-                max_quality = key
-                self.to_screen(u'Using %s' % key)
-                break
-        else:
-            raise ExtractorError(u'Unable to extract video URL')
-
-        mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video URL')
-
-        video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/')
-
-        # TODO: support choosing qualities
-
-        mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract title')
-        video_title = unescapeHTML(mobj.group('title'))
-
-        video_uploader = None
-        video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
-                                             # Looking for official user
-                                             r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
-                                            webpage, 'video uploader')
-
-        video_upload_date = None
-        mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
-        if mobj is not None:
-            video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
-
-        return [{
-            'id':       video_id,
-            'url':      video_url,
-            'uploader': video_uploader,
-            'upload_date':  video_upload_date,
-            'title':    video_title,
-            'ext':      video_extension,
-        }]
 
 
 
 
 class PhotobucketIE(InfoExtractor):
 class PhotobucketIE(InfoExtractor):

+ 77 - 0
youtube_dl/extractor/dailymotion.py

@@ -0,0 +1,77 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_request,
+    compat_urllib_parse,
+
+    ExtractorError,
+    unescapeHTML,
+)
+
+class DailymotionIE(InfoExtractor):
+    """Information Extractor for Dailymotion"""
+
+    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
+    IE_NAME = u'dailymotion'
+
+    def _real_extract(self, url):
+        # Extract id and simplified title from URL
+        mobj = re.match(self._VALID_URL, url)
+
+        video_id = mobj.group(1).split('_')[0].split('?')[0]
+
+        video_extension = 'mp4'
+
+        # Retrieve video webpage to extract further information
+        request = compat_urllib_request.Request(url)
+        request.add_header('Cookie', 'family_filter=off')
+        webpage = self._download_webpage(request, video_id)
+
+        # Extract URL, uploader and title from webpage
+        self.report_extraction(video_id)
+        mobj = re.search(r'\s*var flashvars = (.*)', webpage)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract media URL')
+        flashvars = compat_urllib_parse.unquote(mobj.group(1))
+
+        for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']:
+            if key in flashvars:
+                max_quality = key
+                self.to_screen(u'Using %s' % key)
+                break
+        else:
+            raise ExtractorError(u'Unable to extract video URL')
+
+        mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract video URL')
+
+        video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/')
+
+        # TODO: support choosing qualities
+
+        mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract title')
+        video_title = unescapeHTML(mobj.group('title'))
+
+        video_uploader = None
+        video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
+                                             # Looking for official user
+                                             r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
+                                            webpage, 'video uploader')
+
+        video_upload_date = None
+        mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
+        if mobj is not None:
+            video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
+
+        return [{
+            'id':       video_id,
+            'url':      video_url,
+            'uploader': video_uploader,
+            'upload_date':  video_upload_date,
+            'title':    video_title,
+            'ext':      video_extension,
+        }]