Browse Source

Merge remote-tracking branch 'yasoob/master'

Philipp Hagemeister 12 years ago
parent
commit
d746cd88c2
3 changed files with 43 additions and 0 deletions
  1. 9 0
      test/tests.json
  2. 2 0
      youtube_dl/extractor/__init__.py
  3. 32 0
      youtube_dl/extractor/tudou.py

+ 9 - 0
test/tests.json

@@ -686,5 +686,14 @@
         "upload_date": "20130624",
         "upload_date": "20130624",
         "uploader": "Hurts"
         "uploader": "Hurts"
     }
     }
+  },
+  {
+    "name": "Tudou",
+    "url": "http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html",
+    "file": "159447792.f4v",
+    "md5": "ad7c358a01541e926a1e413612c6b10a",
+    "info_dict": {
+        "title": "卡马乔国足开大脚长传冲吊集锦"
+    }
   }
   }
 ]
 ]

+ 2 - 0
youtube_dl/extractor/__init__.py

@@ -58,6 +58,7 @@ from .youku import YoukuIE
 from .youporn import YouPornIE
 from .youporn import YouPornIE
 from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
 from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
 from .zdf import ZDFIE
 from .zdf import ZDFIE
+from .tudou import TudouIE
 
 
 def gen_extractors():
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
     """ Return a list of an instance of every supported extractor.
@@ -129,6 +130,7 @@ def gen_extractors():
         BreakIE(),
         BreakIE(),
         VevoIE(),
         VevoIE(),
         JukeboxIE(),
         JukeboxIE(),
+        TudouIE(),
         GenericIE()
         GenericIE()
     ]
     ]
 
 

+ 32 - 0
youtube_dl/extractor/tudou.py

@@ -0,0 +1,32 @@
+import re
+
+from .common import InfoExtractor
+
+
+class TudouIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(2).replace('.html','')
+        webpage = self._download_webpage(url, video_id)
+        video_id = re.search('"k":(.+?),',webpage).group(1)
+        title = re.search(",kw:\"(.+)\"",webpage)
+        if title is None:
+            title = re.search(",kw: \'(.+)\'",webpage)
+        title = title.group(1)
+        thumbnail_url = re.search(",pic: \'(.+?)\'",webpage)
+        if thumbnail_url is None:
+            thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
+        thumbnail_url = thumbnail_url.group(1)
+        info_url = "http://v2.tudou.com/f?id="+str(video_id)
+        webpage = self._download_webpage(info_url, video_id, "Opening the info webpage")
+        final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1)
+        ext = (final_url.split('?')[0]).split('.')[-1]
+        return [{
+            'id':        video_id,
+            'url':       final_url,
+            'ext':       ext,
+            'title':     title,
+            'thumbnail': thumbnail_url,
+        }]