Browse Source

Added an IE for todou

M.Yasoob Khalid 12 years ago
parent
commit
9caa687d81
2 changed files with 34 additions and 0 deletions
  1. 2 0
      youtube_dl/extractor/__init__.py
  2. 32 0
      youtube_dl/extractor/tudou.py

+ 2 - 0
youtube_dl/extractor/__init__.py

@@ -58,6 +58,7 @@ from .youku import YoukuIE
 from .youporn import YouPornIE
 from .youporn import YouPornIE
 from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
 from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
 from .zdf import ZDFIE
 from .zdf import ZDFIE
+from .tudou import TudouIE
 
 
 def gen_extractors():
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
     """ Return a list of an instance of every supported extractor.
@@ -129,6 +130,7 @@ def gen_extractors():
         BreakIE(),
         BreakIE(),
         VevoIE(),
         VevoIE(),
         JukeboxIE(),
         JukeboxIE(),
+        TudouIE(),
         GenericIE()
         GenericIE()
     ]
     ]
 
 

+ 32 - 0
youtube_dl/extractor/tudou.py

@@ -0,0 +1,32 @@
+import re
+
+from .common import InfoExtractor
+
+
+class TudouIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(2).replace('.html','')
+        webpage = self._download_webpage(url, video_id)
+        video_id = re.search('"k":(.+?),',webpage).group(1)
+        title = re.search(",kw:\"(.+)\"",webpage)
+        if title is None:
+            title = re.search(",kw: \'(.+)\'",webpage)
+        title = title.group(1)
+        thumbnail_url = re.search(",pic: \'(.+?)\'",webpage)
+        if thumbnail_url is None:
+            thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
+        thumbnail_url = thumbnail_url.group(1)
+        info_url = "http://v2.tudou.com/f?id="+str(video_id)
+        webpage = self._download_webpage(info_url, video_id, "Opening the info webpage")
+        final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1)
+        ext = (final_url.split('?')[0]).split('.')[-1]
+        return [{
+            'id':        video_id,
+            'url':       final_url,
+            'ext':       ext,
+            'title':     title,
+            'thumbnail': thumbnail_url,
+        }]