|
@@ -3,7 +3,11 @@ from __future__ import unicode_literals
|
|
import re
|
|
import re
|
|
|
|
|
|
from .common import InfoExtractor
|
|
from .common import InfoExtractor
|
|
-from ..utils import ExtractorError
|
|
|
|
|
|
+from ..utils import (
|
|
|
|
+ ExtractorError,
|
|
|
|
+ parse_filesize,
|
|
|
|
+ qualities,
|
|
|
|
+)
|
|
|
|
|
|
|
|
|
|
class Channel9IE(InfoExtractor):
|
|
class Channel9IE(InfoExtractor):
|
|
@@ -52,23 +56,6 @@ class Channel9IE(InfoExtractor):
|
|
|
|
|
|
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
|
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
|
|
|
|
|
- # Sorted by quality
|
|
|
|
- _known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4']
|
|
|
|
-
|
|
|
|
- def _restore_bytes(self, formatted_size):
|
|
|
|
- if not formatted_size:
|
|
|
|
- return 0
|
|
|
|
- m = re.match(r'^(?P<size>\d+(?:\.\d+)?)\s+(?P<units>[a-zA-Z]+)', formatted_size)
|
|
|
|
- if not m:
|
|
|
|
- return 0
|
|
|
|
- units = m.group('units')
|
|
|
|
- try:
|
|
|
|
- exponent = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'].index(units.upper())
|
|
|
|
- except ValueError:
|
|
|
|
- return 0
|
|
|
|
- size = float(m.group('size'))
|
|
|
|
- return int(size * (1024 ** exponent))
|
|
|
|
-
|
|
|
|
def _formats_from_html(self, html):
|
|
def _formats_from_html(self, html):
|
|
FORMAT_REGEX = r'''
|
|
FORMAT_REGEX = r'''
|
|
(?x)
|
|
(?x)
|
|
@@ -78,16 +65,20 @@ class Channel9IE(InfoExtractor):
|
|
<h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
|
|
<h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
|
|
</div>)? # File size part may be missing
|
|
</div>)? # File size part may be missing
|
|
'''
|
|
'''
|
|
- # Extract known formats
|
|
|
|
|
|
+ quality = qualities((
|
|
|
|
+ 'MP3', 'MP4',
|
|
|
|
+ 'Low Quality WMV', 'Low Quality MP4',
|
|
|
|
+ 'Mid Quality WMV', 'Mid Quality MP4',
|
|
|
|
+ 'High Quality WMV', 'High Quality MP4'))
|
|
formats = [{
|
|
formats = [{
|
|
'url': x.group('url'),
|
|
'url': x.group('url'),
|
|
'format_id': x.group('quality'),
|
|
'format_id': x.group('quality'),
|
|
'format_note': x.group('note'),
|
|
'format_note': x.group('note'),
|
|
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
|
|
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
|
|
- 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
|
|
|
|
- 'preference': self._known_formats.index(x.group('quality')),
|
|
|
|
|
|
+ 'filesize_approx': parse_filesize(x.group('filesize')),
|
|
|
|
+ 'quality': quality(x.group('quality')),
|
|
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
|
|
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
|
|
- } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
|
|
|
|
|
|
+ } for x in list(re.finditer(FORMAT_REGEX, html))]
|
|
|
|
|
|
self._sort_formats(formats)
|
|
self._sort_formats(formats)
|
|
|
|
|