Browse Source

Add an extractor for southparkstudios.com (closes #1434)

It uses the MTV system
Jaime Marquínez Ferrándiz 12 years ago
parent
commit
5a6fecc3de
2 changed files with 35 additions and 0 deletions
  1. 1 0
      youtube_dl/extractor/__init__.py
  2. 34 0
      youtube_dl/extractor/southparkstudios.py

+ 1 - 0
youtube_dl/extractor/__init__.py

@@ -83,6 +83,7 @@ from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
 from .slideshare import SlideshareIE
 from .sohu import SohuIE
 from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
+from .southparkstudios import SouthParkStudiosIE
 from .spiegel import SpiegelIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .statigram import StatigramIE
 from .statigram import StatigramIE

+ 34 - 0
youtube_dl/extractor/southparkstudios.py

@@ -0,0 +1,34 @@
+import re
+
+from .mtv import MTVIE, _media_xml_tag
+
+
+class SouthParkStudiosIE(MTVIE):
+    IE_NAME = u'southparkstudios.com'
+    _VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P<id>\d+)'
+
+    _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
+
+    _TEST = {
+        u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
+        u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
+        u'info_dict': {
+            u'title': u'Bat Daded',
+            u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
+        },
+    }
+
+    # Overwrite MTVIE properties we don't want
+    _TESTS = []
+
+    def _get_thumbnail_url(self, uri, itemdoc):
+        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+        return itemdoc.find(search_path).attrib['url']
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
+                                  webpage, u'mgid')
+        return self._get_videos_info(mgid)