Browse Source

Merge branch 'master' of github.com:rg3/youtube-dl

Philipp Hagemeister 10 năm trước cách đây
mục cha
commit
71e7da6533

+ 1 - 0
AUTHORS

@@ -105,3 +105,4 @@ Dinesh S
 Johan K. Jensen
 Yen Chi Hsuan
 Enam Mijbah Noor
+David Luhmer

+ 23 - 0
youtube_dl/extractor/common.py

@@ -14,6 +14,7 @@ import xml.etree.ElementTree
 
 from ..compat import (
     compat_cookiejar,
+    compat_HTTPError,
     compat_http_client,
     compat_urllib_error,
     compat_urllib_parse_urlparse,
@@ -26,6 +27,7 @@ from ..utils import (
     compiled_regex_type,
     ExtractorError,
     float_or_none,
+    HEADRequest,
     int_or_none,
     RegexNotFoundError,
     sanitize_filename,
@@ -716,6 +718,27 @@ class InfoExtractor(object):
             )
         formats.sort(key=_formats_key)
 
+    def _check_formats(self, formats, video_id):
+        if formats:
+            formats[:] = filter(
+                lambda f: self._is_valid_url(
+                    f['url'], video_id,
+                    item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
+                formats)
+
+    def _is_valid_url(self, url, video_id, item='video'):
+        try:
+            self._request_webpage(
+                HEADRequest(url), video_id,
+                'Checking %s URL' % item)
+            return True
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError):
+                self.report_warning(
+                    '%s URL is invalid, skipping' % item, video_id)
+                return False
+            raise
+
     def http_scheme(self):
         """ Either "http:" or "https:", depending on the user's preferences """
         return (

+ 8 - 2
youtube_dl/extractor/drtv.py

@@ -48,14 +48,20 @@ class DRTVIE(SubtitlesInfoExtractor):
             elif asset['Kind'] == 'VideoResource':
                 duration = asset['DurationInMilliseconds'] / 1000.0
                 restricted_to_denmark = asset['RestrictedToDenmark']
+                spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
                 for link in asset['Links']:
                     target = link['Target']
                     uri = link['Uri']
+                    format_id = target
+                    preference = -1 if target == 'HDS' else -2
+                    if spoken_subtitles:
+                        preference -= 2
+                        format_id += '-spoken-subtitles'
                     formats.append({
                         'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
-                        'format_id': target,
+                        'format_id': format_id,
                         'ext': link['FileFormat'],
-                        'preference': -1 if target == 'HDS' else -2,
+                        'preference': preference,
                     })
                 subtitles_list = asset.get('SubtitlesList')
                 if isinstance(subtitles_list, list):

+ 1 - 0
youtube_dl/extractor/lynda.py

@@ -85,6 +85,7 @@ class LyndaIE(SubtitlesInfoExtractor):
                 } for format_id, video_url in prioritized_streams['0'].items()
             ])
 
+        self._check_formats(formats, video_id)
         self._sort_formats(formats)
 
         if self._downloader.params.get('listsubtitles', False):

+ 2 - 18
youtube_dl/extractor/spiegel.py

@@ -4,14 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urlparse,
-    compat_HTTPError,
-)
-from ..utils import (
-    HEADRequest,
-    ExtractorError,
-)
+from ..compat import compat_urlparse
 from .spiegeltv import SpiegeltvIE
 
 
@@ -72,16 +65,6 @@ class SpiegelIE(InfoExtractor):
             if n.tag.startswith('type') and n.tag != 'type6':
                 format_id = n.tag.rpartition('type')[2]
                 video_url = base_url + n.find('./filename').text
-                # Test video URLs beforehand as some of them are invalid
-                try:
-                    self._request_webpage(
-                        HEADRequest(video_url), video_id,
-                        'Checking %s video URL' % format_id)
-                except ExtractorError as e:
-                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
-                        self.report_warning(
-                            '%s video URL is invalid, skipping' % format_id, video_id)
-                        continue
                 formats.append({
                     'format_id': format_id,
                     'url': video_url,
@@ -94,6 +77,7 @@ class SpiegelIE(InfoExtractor):
                 })
         duration = float(idoc[0].findall('./duration')[0].text)
 
+        self._check_formats(formats, video_id)
         self._sort_formats(formats)
 
         return {