Browse Source

The Daily Show Podcast support

felix 10 years ago
parent
commit
2e90dff2c2

+ 2 - 1
youtube_dl/extractor/__init__.py

@@ -84,7 +84,7 @@ from .cnn import (
 )
 )
 from .collegehumor import CollegeHumorIE
 from .collegehumor import CollegeHumorIE
 from .collegerama import CollegeRamaIE
 from .collegerama import CollegeRamaIE
-from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
+from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE, TheDailyShowPodcastIE
 from .comcarcoff import ComCarCoffIE
 from .comcarcoff import ComCarCoffIE
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
 from .condenast import CondeNastIE
 from .condenast import CondeNastIE
@@ -250,6 +250,7 @@ from .letv import (
     LetvPlaylistIE
     LetvPlaylistIE
 )
 )
 from .lifenews import LifeNewsIE
 from .lifenews import LifeNewsIE
+from .libsyn import LibsynIE
 from .liveleak import LiveLeakIE
 from .liveleak import LiveLeakIE
 from .livestream import (
 from .livestream import (
     LivestreamIE,
     LivestreamIE,

+ 21 - 0
youtube_dl/extractor/comedycentral.py

@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 
 import re
 import re
 
 
+from .common import InfoExtractor
 from .mtv import MTVServicesInfoExtractor
 from .mtv import MTVServicesInfoExtractor
 from ..compat import (
 from ..compat import (
     compat_str,
     compat_str,
@@ -272,3 +273,23 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
             'title': show_name + ' ' + title,
             'title': show_name + ' ' + title,
             'description': description,
             'description': description,
         }
         }
+
+class TheDailyShowPodcastIE(InfoExtractor):
+    _VALID_URL = r'(?P<scheme>https?:)?//thedailyshow\.cc\.com/podcast/(?P<id>[a-z\-]+)'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        player_url = self._search_regex(r'<iframe(?:\s+[^>]+)?\s*src="((?:https?:)?//html5-player\.libsyn\.com/embed/episode/id/[0-9]+)', webpage, 'player URL')
+        if player_url.startswith('//'):
+            mobj = re.match(self._VALID_URL, url)
+            scheme = mobj.group('scheme')
+            if not scheme:
+                scheme = 'https:'
+            player_url = scheme + player_url
+
+        return {
+            '_type': 'url_transparent',
+            'url': player_url,
+        }

+ 41 - 0
youtube_dl/extractor/libsyn.py

@@ -0,0 +1,41 @@
+# encoding: utf-8
+from .common import InfoExtractor
+from ..utils import (
+    unified_strdate,
+)
+
+class LibsynIE(InfoExtractor):
+    _VALID_URL = r'(?:https?:)?//html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)(?:/.*)?'
+
+    def _real_extract(self, url):
+        if url.startswith('//'):
+            url = 'https:' + url
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        podcast_title         = self._search_regex(r'<h2>(.*?)</h2>', webpage, 'show title')
+        podcast_episode_title = self._search_regex(r'<h3>(.*?)</h3>', webpage, 'episode title')
+        podcast_date          = unified_strdate(self._search_regex(r'<div class="release_date">Released: (.*?)</div>', webpage, 'release date'))
+        podcast_description   = self._search_regex(r'<div id="info_text_body">(.*?)</div>', webpage, 'description')
+
+        url0 = self._search_regex(r'var mediaURLLibsyn = "(?P<url0>https?://.*)";', webpage, 'first media URL')
+        url1 = self._search_regex(r'var mediaURL = "(?P<url1>https?://.*)";', webpage, 'second media URL')
+
+        if url0 != url1:
+            formats = [{
+                'url': url0
+            }, {
+                'url': url1
+            }]
+        else:
+            formats = [{
+                'url': url0
+            }]
+
+        return {
+            'id': display_id,
+            'title': podcast_episode_title,
+            'description': podcast_description,
+            'upload_date': podcast_date,
+            'formats': formats,
+        }