12 years ago · 4d2ebb6bd7
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -3,10 +3,7 @@
 
				 import re
			
 
				 
			
 
				 from .common import InfoExtractor
			
 
				-from ..utils import (
			
 
				-    format_bytes,
			
 
				-    ExtractorError,
			
 
				-)
			
 
				+from ..utils import ExtractorError
			
 
				 
			
 
				 class Channel9IE(InfoExtractor):
			
 
				     '''
			
@@ -51,7 +48,6 @@ class Channel9IE(InfoExtractor):
 
				     ]
			
 
				 
			
 
				     _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
			
 
				-    _EXTRACT_ENTRY_ITEMS_FROM_RSS = False
			
 
				 
			
 
				     # Sorted by quality
			
 
				     _known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4']
			
@@ -90,37 +86,6 @@ class Channel9IE(InfoExtractor):
 
				         formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
			
 
				         return formats
			
 
				 
			
 
				-    def _formats_from_rss_item(self, item):
			
 
				-
			
 
				-        def process_formats(elem):
			
 
				-            formats = []
			
 
				-            for media_content in elem.findall('./{http://search.yahoo.com/mrss/}content'):
			
 
				-                url = media_content.attrib['url']
			
 
				-                # Ignore unrelated media
			
 
				-                if url.endswith('.ism/manifest'):
			
 
				-                    continue
			
 
				-                format_note = media_content.attrib['type']
			
 
				-                filesize = int(media_content.attrib['fileSize'])
			
 
				-                formats.append({'url': url,
			
 
				-                                'format_note': format_note,
			
 
				-                                'format': '%s %s' % (format_note, format_bytes(filesize)),
			
 
				-                                'filesize': filesize,
			
 
				-                                })
			
 
				-            return formats
			
 
				-
			
 
				-        formats = []
			
 
				-
			
 
				-        for media_group in item.findall('./{http://search.yahoo.com/mrss/}group'):
			
 
				-            formats.extend(process_formats(media_group))
			
 
				-
			
 
				-        # Sometimes there are no media:groups in item, but there is media:content
			
 
				-        # right in item (usually when there is the only media source)
			
 
				-        formats.extend(process_formats(item))        
			
 
				-
			
 
				-        # Sort by file size
			
 
				-        formats.sort(key=lambda fmt: fmt['filesize'])
			
 
				-        return formats
			
 
				-
			
 
				     def _extract_title(self, html):
			
 
				         title = self._html_search_meta(u'title', html, u'title')
			
 
				         if title is None:           
			
@@ -274,61 +239,12 @@ class Channel9IE(InfoExtractor):
 
				 
			
 
				         return contents
			
 
				 
			
 
				-    def _extract_content_rss(self, rss):
			
 
				-        '''
			
 
				-        Extracts links to entry items right out of RSS feed.
			
 
				-        This approach is faster than extracting from web pages
			
 
				-        one by one, but suffers from some problems.
			
 
				-        Pros:
			
 
				-         - no need to download additional pages
			
 
				-         - provides more media links
			
 
				-         - accurate file size
			
 
				-        Cons:
			
 
				-         - fewer meta data provided
			
 
				-         - links to media files have no appropriate data that may be used as format_id
			
 
				-         - RSS does not contain links to presentation materials (slides, zip)
			
 
				-        '''
			
 
				-        entries = []
			
 
				-        for item in rss.findall('./channel/item'):
			
 
				-            url = item.find('./link').text
			
 
				-            video_id = url.split('/')[-1]
			
 
				-            formats = self._formats_from_rss_item(item)
			
 
				-
			
 
				-            if len(formats) == 0:
			
 
				-                self._downloader.report_warning(u'The recording for session %s is not yet available' % video_id)
			
 
				-                continue
			
 
				-
			
 
				-            title = item.find('./title').text
			
 
				-            description = item.find('./description').text
			
 
				-
			
 
				-            thumbnail = item.find('./{http://search.yahoo.com/mrss/}thumbnail').text
			
 
				-
			
 
				-            duration_e = item.find('./{http://www.itunes.com/dtds/podcast-1.0.dtd}duration')
			
 
				-            duration = duration_e.text if duration_e is not None else 0
			
 
				-
			
 
				-            speakers_e = item.find('./{http://purl.org/dc/elements/1.1/}creator')
			
 
				-            speakers = speakers_e.text.split(', ') if speakers_e is not None and speakers_e.text else []
			
 
				-
			
 
				-            entries.append({'_type': 'video',
			
 
				-                            'id': video_id,
			
 
				-                            'formats': formats,
			
 
				-                            'title': title,
			
 
				-                            'description': description,
			
 
				-                            'thumbnail': thumbnail,
			
 
				-                            'duration': duration,
			
 
				-                            'session_speakers': speakers,                            
			
 
				-                            })
			
 
				-        return entries
			
 
				-
			
 
				     def _extract_list(self, content_path):
			
 
				         rss = self._download_xml(self._RSS_URL % content_path, content_path, u'Downloading RSS')
			
 
				-        if self._EXTRACT_ENTRY_ITEMS_FROM_RSS:   
			
 
				-            return self._extract_content_rss(rss)
			
 
				-        else:
			
 
				-            entries = [self.url_result(session_url.text, 'Channel9')
			
 
				-                       for session_url in rss.findall('./channel/item/link')]
			
 
				-            title_text = rss.find('./channel/title').text
			
 
				-            return self.playlist_result(entries, content_path, title_text)
			
 
				+        entries = [self.url_result(session_url.text, 'Channel9')
			
 
				+                   for session_url in rss.findall('./channel/item/link')]
			
 
				+        title_text = rss.find('./channel/title').text
			
 
				+        return self.playlist_result(entries, content_path, title_text)
			
 
				 
			
 
				     def _real_extract(self, url):
			
 
				         mobj = re.match(self._VALID_URL, url)