Selaa lähdekoodia

Improve URL extraction

Sergey M․ 7 vuotta sitten
vanhempi
sitoutus
3052a30d42
47 muutettua tiedostoa jossa 166 lisäystä ja 139 poistoa
  1. 2 1
      youtube_dl/extractor/adultswim.py
  2. 2 1
      youtube_dl/extractor/afreecatv.py
  3. 8 7
      youtube_dl/extractor/amp.py
  4. 2 1
      youtube_dl/extractor/animeondemand.py
  5. 2 1
      youtube_dl/extractor/aol.py
  6. 3 3
      youtube_dl/extractor/apa.py
  7. 2 1
      youtube_dl/extractor/aparat.py
  8. 2 2
      youtube_dl/extractor/ard.py
  9. 4 3
      youtube_dl/extractor/bandcamp.py
  10. 6 4
      youtube_dl/extractor/breakcom.py
  11. 3 3
      youtube_dl/extractor/cammodels.py
  12. 3 3
      youtube_dl/extractor/ccma.py
  13. 6 8
      youtube_dl/extractor/crackle.py
  14. 4 3
      youtube_dl/extractor/dctp.py
  15. 3 4
      youtube_dl/extractor/discoverygo.py
  16. 5 5
      youtube_dl/extractor/dramafever.py
  17. 3 5
      youtube_dl/extractor/eagleplatform.py
  18. 5 3
      youtube_dl/extractor/egghead.py
  19. 3 2
      youtube_dl/extractor/eporner.py
  20. 3 2
      youtube_dl/extractor/firsttv.py
  21. 4 4
      youtube_dl/extractor/francetv.py
  22. 2 1
      youtube_dl/extractor/frontendmasters.py
  23. 3 2
      youtube_dl/extractor/generic.py
  24. 5 5
      youtube_dl/extractor/hidive.py
  25. 3 3
      youtube_dl/extractor/imdb.py
  26. 2 1
      youtube_dl/extractor/instagram.py
  27. 3 2
      youtube_dl/extractor/itv.py
  28. 4 5
      youtube_dl/extractor/keezmovies.py
  29. 3 2
      youtube_dl/extractor/konserthusetplay.py
  30. 3 2
      youtube_dl/extractor/mediasite.py
  31. 3 2
      youtube_dl/extractor/peertube.py
  32. 3 3
      youtube_dl/extractor/redtube.py
  33. 3 2
      youtube_dl/extractor/rentv.py
  34. 3 2
      youtube_dl/extractor/rutube.py
  35. 3 2
      youtube_dl/extractor/turner.py
  36. 3 4
      youtube_dl/extractor/tvnet.py
  37. 3 1
      youtube_dl/extractor/tvplay.py
  38. 3 2
      youtube_dl/extractor/twitch.py
  39. 7 6
      youtube_dl/extractor/udemy.py
  40. 4 6
      youtube_dl/extractor/vidme.py
  41. 3 1
      youtube_dl/extractor/vk.py
  42. 5 2
      youtube_dl/extractor/xhamster.py
  43. 4 4
      youtube_dl/extractor/yapfiles.py
  44. 3 3
      youtube_dl/extractor/youjizz.py
  45. 3 3
      youtube_dl/extractor/youporn.py
  46. 3 2
      youtube_dl/extractor/zattoo.py
  47. 7 5
      youtube_dl/extractor/zdf.py

+ 2 - 1
youtube_dl/extractor/adultswim.py

@@ -7,6 +7,7 @@ from .turner import TurnerBaseIE
 from ..utils import (
     int_or_none,
     strip_or_none,
+    url_or_none,
 )
 
 
@@ -98,7 +99,7 @@ class AdultSwimIE(TurnerBaseIE):
             if not video_id:
                 entries = []
                 for episode in video_data.get('archiveEpisodes', []):
-                    episode_url = episode.get('url')
+                    episode_url = url_or_none(episode.get('url'))
                     if not episode_url:
                         continue
                     entries.append(self.url_result(

+ 2 - 1
youtube_dl/extractor/afreecatv.py

@@ -9,6 +9,7 @@ from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
+    url_or_none,
     urlencode_postdata,
     xpath_text,
 )
@@ -304,7 +305,7 @@ class AfreecaTVIE(InfoExtractor):
             file_elements = video_element.findall(compat_xpath('./file'))
             one = len(file_elements) == 1
             for file_num, file_element in enumerate(file_elements, start=1):
-                file_url = file_element.text
+                file_url = url_or_none(file_element.text)
                 if not file_url:
                     continue
                 key = file_element.get('key', '')

+ 8 - 7
youtube_dl/extractor/amp.py

@@ -3,11 +3,12 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
-    int_or_none,
-    parse_iso8601,
-    mimetype2ext,
     determine_ext,
     ExtractorError,
+    int_or_none,
+    mimetype2ext,
+    parse_iso8601,
+    url_or_none,
 )
 
 
@@ -35,7 +36,7 @@ class AMPIE(InfoExtractor):
                 media_thumbnail = [media_thumbnail]
             for thumbnail_data in media_thumbnail:
                 thumbnail = thumbnail_data.get('@attributes', {})
-                thumbnail_url = thumbnail.get('url')
+                thumbnail_url = url_or_none(thumbnail.get('url'))
                 if not thumbnail_url:
                     continue
                 thumbnails.append({
@@ -51,7 +52,7 @@ class AMPIE(InfoExtractor):
                 media_subtitle = [media_subtitle]
             for subtitle_data in media_subtitle:
                 subtitle = subtitle_data.get('@attributes', {})
-                subtitle_href = subtitle.get('href')
+                subtitle_href = url_or_none(subtitle.get('href'))
                 if not subtitle_href:
                     continue
                 subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
@@ -65,7 +66,7 @@ class AMPIE(InfoExtractor):
             media_content = [media_content]
         for media_data in media_content:
             media = media_data.get('@attributes', {})
-            media_url = media.get('url')
+            media_url = url_or_none(media.get('url'))
             if not media_url:
                 continue
             ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
@@ -79,7 +80,7 @@ class AMPIE(InfoExtractor):
             else:
                 formats.append({
                     'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
-                    'url': media['url'],
+                    'url': media_url,
                     'tbr': int_or_none(media.get('bitrate')),
                     'filesize': int_or_none(media.get('fileSize')),
                     'ext': ext,

+ 2 - 1
youtube_dl/extractor/animeondemand.py

@@ -8,6 +8,7 @@ from ..utils import (
     determine_ext,
     extract_attributes,
     ExtractorError,
+    url_or_none,
     urlencode_postdata,
     urljoin,
 )
@@ -165,7 +166,7 @@ class AnimeOnDemandIE(InfoExtractor):
                         }, fatal=False)
                     if not playlist:
                         continue
-                    stream_url = playlist.get('streamurl')
+                    stream_url = url_or_none(playlist.get('streamurl'))
                     if stream_url:
                         rtmp = re.search(
                             r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',

+ 2 - 1
youtube_dl/extractor/aol.py

@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
     int_or_none,
+    url_or_none,
 )
 
 
@@ -77,7 +78,7 @@ class AolIE(InfoExtractor):
             formats.extend(self._extract_m3u8_formats(
                 m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
         for rendition in video_data.get('renditions', []):
-            video_url = rendition.get('url')
+            video_url = url_or_none(rendition.get('url'))
             if not video_url:
                 continue
             ext = rendition.get('format')

+ 3 - 3
youtube_dl/extractor/apa.py

@@ -4,10 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     determine_ext,
     js_to_json,
+    url_or_none,
 )
 
 
@@ -68,8 +68,8 @@ class APAIE(InfoExtractor):
         for source in sources:
             if not isinstance(source, dict):
                 continue
-            source_url = source.get('file')
-            if not source_url or not isinstance(source_url, compat_str):
+            source_url = url_or_none(source.get('file'))
+            if not source_url:
                 continue
             ext = determine_ext(source_url)
             if ext == 'm3u8':

+ 2 - 1
youtube_dl/extractor/aparat.py

@@ -5,6 +5,7 @@ from .common import InfoExtractor
 from ..utils import (
     int_or_none,
     mimetype2ext,
+    url_or_none,
 )
 
 
@@ -43,7 +44,7 @@ class AparatIE(InfoExtractor):
 
         formats = []
         for item in file_list[0]:
-            file_url = item.get('file')
+            file_url = url_or_none(item.get('file'))
             if not file_url:
                 continue
             ext = mimetype2ext(item.get('type'))

+ 2 - 2
youtube_dl/extractor/ard.py

@@ -5,7 +5,6 @@ import re
 
 from .common import InfoExtractor
 from .generic import GenericIE
-from ..compat import compat_str
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -15,6 +14,7 @@ from ..utils import (
     unified_strdate,
     xpath_text,
     update_url_query,
+    url_or_none,
 )
 from ..compat import compat_etree_fromstring
 
@@ -100,7 +100,7 @@ class ARDMediathekIE(InfoExtractor):
                 quality = stream.get('_quality')
                 server = stream.get('_server')
                 for stream_url in stream_urls:
-                    if not isinstance(stream_url, compat_str) or '//' not in stream_url:
+                    if not url_or_none(stream_url):
                         continue
                     ext = determine_ext(stream_url)
                     if quality != 'auto' and ext in ('f4m', 'm3u8'):

+ 4 - 3
youtube_dl/extractor/bandcamp.py

@@ -19,6 +19,7 @@ from ..utils import (
     unescapeHTML,
     update_url_query,
     unified_strdate,
+    url_or_none,
 )
 
 
@@ -131,8 +132,8 @@ class BandcampIE(InfoExtractor):
                 fatal=False)
             if not stat:
                 continue
-            retry_url = stat.get('retry_url')
-            if not isinstance(retry_url, compat_str):
+            retry_url = url_or_none(stat.get('retry_url'))
+            if not retry_url:
                 continue
             formats.append({
                 'url': self._proto_relative_url(retry_url, 'http:'),
@@ -306,7 +307,7 @@ class BandcampWeeklyIE(InfoExtractor):
 
         formats = []
         for format_id, format_url in show['audio_stream'].items():
-            if not isinstance(format_url, compat_str):
+            if not url_or_none(format_url):
                 continue
             for known_ext in KNOWN_EXTENSIONS:
                 if known_ext in format_id:

+ 6 - 4
youtube_dl/extractor/breakcom.py

@@ -4,8 +4,10 @@ import re
 
 from .common import InfoExtractor
 from .youtube import YoutubeIE
-from ..compat import compat_str
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    url_or_none,
+)
 
 
 class BreakIE(InfoExtractor):
@@ -55,8 +57,8 @@ class BreakIE(InfoExtractor):
 
         formats = []
         for video in content:
-            video_url = video.get('url')
-            if not video_url or not isinstance(video_url, compat_str):
+            video_url = url_or_none(video.get('url'))
+            if not video_url:
                 continue
             bitrate = int_or_none(self._search_regex(
                 r'(\d+)_kbps', video_url, 'tbr', default=None))

+ 3 - 3
youtube_dl/extractor/cammodels.py

@@ -2,10 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     int_or_none,
+    url_or_none,
 )
 
 
@@ -56,8 +56,8 @@ class CamModelsIE(InfoExtractor):
             for media in encodings:
                 if not isinstance(media, dict):
                     continue
-                media_url = media.get('location')
-                if not media_url or not isinstance(media_url, compat_str):
+                media_url = url_or_none(media.get('location'))
+                if not media_url:
                     continue
 
                 format_id_list = [format_id]

+ 3 - 3
youtube_dl/extractor/ccma.py

@@ -4,13 +4,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     clean_html,
     int_or_none,
     parse_duration,
     parse_iso8601,
     parse_resolution,
+    url_or_none,
 )
 
 
@@ -53,8 +53,8 @@ class CCMAIE(InfoExtractor):
         media_url = media['media']['url']
         if isinstance(media_url, list):
             for format_ in media_url:
-                format_url = format_.get('file')
-                if not format_url or not isinstance(format_url, compat_str):
+                format_url = url_or_none(format_.get('file'))
+                if not format_url:
                     continue
                 label = format_.get('label')
                 f = parse_resolution(label)

+ 6 - 8
youtube_dl/extractor/crackle.py

@@ -4,16 +4,14 @@ from __future__ import unicode_literals, division
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-    compat_HTTPError,
-)
+from ..compat import compat_HTTPError
 from ..utils import (
     determine_ext,
     float_or_none,
     int_or_none,
     parse_age_limit,
     parse_duration,
+    url_or_none,
     ExtractorError
 )
 
@@ -86,8 +84,8 @@ class CrackleIE(InfoExtractor):
             for e in media['MediaURLs']:
                 if e.get('UseDRM') is True:
                     continue
-                format_url = e.get('Path')
-                if not format_url or not isinstance(format_url, compat_str):
+                format_url = url_or_none(e.get('Path'))
+                if not format_url:
                     continue
                 ext = determine_ext(format_url)
                 if ext == 'm3u8':
@@ -124,8 +122,8 @@ class CrackleIE(InfoExtractor):
                 for cc_file in cc_files:
                     if not isinstance(cc_file, dict):
                         continue
-                    cc_url = cc_file.get('Path')
-                    if not cc_url or not isinstance(cc_url, compat_str):
+                    cc_url = url_or_none(cc_file.get('Path'))
+                    if not cc_url:
                         continue
                     lang = cc_file.get('Locale') or 'en'
                     subtitles.setdefault(lang, []).append({'url': cc_url})

+ 4 - 3
youtube_dl/extractor/dctp.py

@@ -7,6 +7,7 @@ from ..utils import (
     float_or_none,
     int_or_none,
     unified_timestamp,
+    url_or_none,
 )
 
 
@@ -69,7 +70,7 @@ class DctpTvIE(InfoExtractor):
             endpoint = next(
                 server['endpoint']
                 for server in servers
-                if isinstance(server.get('endpoint'), compat_str) and
+                if url_or_none(server.get('endpoint')) and
                 'cloudfront' in server['endpoint'])
         else:
             endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
@@ -92,8 +93,8 @@ class DctpTvIE(InfoExtractor):
             for image in images:
                 if not isinstance(image, dict):
                     continue
-                image_url = image.get('url')
-                if not image_url or not isinstance(image_url, compat_str):
+                image_url = url_or_none(image.get('url'))
+                if not image_url:
                     continue
                 thumbnails.append({
                     'url': image_url,

+ 3 - 4
youtube_dl/extractor/discoverygo.py

@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     determine_ext,
     extract_attributes,
@@ -12,6 +11,7 @@ from ..utils import (
     parse_age_limit,
     remove_end,
     unescapeHTML,
+    url_or_none,
 )
 
 
@@ -69,9 +69,8 @@ class DiscoveryGoBaseIE(InfoExtractor):
         captions = stream.get('captions')
         if isinstance(captions, list):
             for caption in captions:
-                subtitle_url = caption.get('fileUrl')
-                if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
-                        not subtitle_url.startswith('http')):
+                subtitle_url = url_or_none(caption.get('fileUrl'))
+                if not subtitle_url or not subtitle_url.startswith('http'):
                     continue
                 lang = caption.get('fileLang', 'en')
                 ext = determine_ext(subtitle_url)

+ 5 - 5
youtube_dl/extractor/dramafever.py

@@ -7,7 +7,6 @@ import json
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
-    compat_str,
     compat_urlparse,
 )
 from ..utils import (
@@ -17,6 +16,7 @@ from ..utils import (
     parse_age_limit,
     parse_duration,
     unified_timestamp,
+    url_or_none,
 )
 
 
@@ -139,8 +139,8 @@ class DramaFeverIE(DramaFeverBaseIE):
         for sub in subs:
             if not isinstance(sub, dict):
                 continue
-            sub_url = sub.get('url')
-            if not sub_url or not isinstance(sub_url, compat_str):
+            sub_url = url_or_none(sub.get('url'))
+            if not sub_url:
                 continue
             subtitles.setdefault(
                 sub.get('code') or sub.get('language') or 'en', []).append({
@@ -163,8 +163,8 @@ class DramaFeverIE(DramaFeverBaseIE):
             for format_id, format_dict in download_assets.items():
                 if not isinstance(format_dict, dict):
                     continue
-                format_url = format_dict.get('url')
-                if not format_url or not isinstance(format_url, compat_str):
+                format_url = url_or_none(format_dict.get('url'))
+                if not format_url:
                     continue
                 formats.append({
                     'url': format_url,

+ 3 - 5
youtube_dl/extractor/eagleplatform.py

@@ -4,14 +4,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_HTTPError,
-    compat_str,
-)
+from ..compat import compat_HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
     unsmuggle_url,
+    url_or_none,
 )
 
 
@@ -177,7 +175,7 @@ class EaglePlatformIE(InfoExtractor):
             video_id, 'Downloading mp4 JSON', fatal=False)
         if mp4_data:
             for format_id, format_url in mp4_data.get('data', {}).items():
-                if not isinstance(format_url, compat_str):
+                if not url_or_none(format_url):
                     continue
                 height = int_or_none(format_id)
                 if height is not None and m3u8_formats_dict.get(height):

+ 5 - 3
youtube_dl/extractor/egghead.py

@@ -8,6 +8,7 @@ from ..utils import (
     int_or_none,
     try_get,
     unified_timestamp,
+    url_or_none,
 )
 
 
@@ -34,8 +35,8 @@ class EggheadCourseIE(InfoExtractor):
 
         entries = []
         for lesson in lessons:
-            lesson_url = lesson.get('http_url')
-            if not lesson_url or not isinstance(lesson_url, compat_str):
+            lesson_url = url_or_none(lesson.get('http_url'))
+            if not lesson_url:
                 continue
             lesson_id = lesson.get('id')
             if lesson_id:
@@ -95,7 +96,8 @@ class EggheadLessonIE(InfoExtractor):
 
         formats = []
         for _, format_url in lesson['media_urls'].items():
-            if not format_url or not isinstance(format_url, compat_str):
+            format_url = url_or_none(format_url)
+            if not format_url:
                 continue
             ext = determine_ext(format_url)
             if ext == 'm3u8':

+ 3 - 2
youtube_dl/extractor/eporner.py

@@ -11,6 +11,7 @@ from ..utils import (
     int_or_none,
     parse_duration,
     str_to_int,
+    url_or_none,
 )
 
 
@@ -82,8 +83,8 @@ class EpornerIE(InfoExtractor):
             for format_id, format_dict in formats_dict.items():
                 if not isinstance(format_dict, dict):
                     continue
-                src = format_dict.get('src')
-                if not isinstance(src, compat_str) or not src.startswith('http'):
+                src = url_or_none(format_dict.get('src'))
+                if not src or not src.startswith('http'):
                     continue
                 if kind == 'hls':
                     formats.extend(self._extract_m3u8_formats(

+ 3 - 2
youtube_dl/extractor/firsttv.py

@@ -10,6 +10,7 @@ from ..utils import (
     int_or_none,
     qualities,
     unified_strdate,
+    url_or_none,
 )
 
 
@@ -88,8 +89,8 @@ class FirstTVIE(InfoExtractor):
             formats = []
             path = None
             for f in item.get('mbr', []):
-                src = f.get('src')
-                if not src or not isinstance(src, compat_str):
+                src = url_or_none(f.get('src'))
+                if not src:
                     continue
                 tbr = int_or_none(self._search_regex(
                     r'_(\d{3,})\.mp4', src, 'tbr', default=None))

+ 4 - 4
youtube_dl/extractor/francetv.py

@@ -16,6 +16,7 @@ from ..utils import (
     int_or_none,
     parse_duration,
     try_get,
+    url_or_none,
 )
 from .dailymotion import DailymotionIE
 
@@ -115,14 +116,13 @@ class FranceTVIE(InfoExtractor):
 
         def sign(manifest_url, manifest_id):
             for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
-                signed_url = self._download_webpage(
+                signed_url = url_or_none(self._download_webpage(
                     'https://%s/esi/TA' % host, video_id,
                     'Downloading signed %s manifest URL' % manifest_id,
                     fatal=False, query={
                         'url': manifest_url,
-                    })
-                if (signed_url and isinstance(signed_url, compat_str) and
-                        re.search(r'^(?:https?:)?//', signed_url)):
+                    }))
+                if signed_url:
                     return signed_url
             return manifest_url
 

+ 2 - 1
youtube_dl/extractor/frontendmasters.py

@@ -11,6 +11,7 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     parse_duration,
+    url_or_none,
     urlencode_postdata,
 )
 
@@ -80,7 +81,7 @@ class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
         chapters = []
         lesson_elements = course.get('lessonElements')
         if isinstance(lesson_elements, list):
-            chapters = [e for e in lesson_elements if isinstance(e, compat_str)]
+            chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
         return chapters
 
     @staticmethod

+ 3 - 2
youtube_dl/extractor/generic.py

@@ -32,6 +32,7 @@ from ..utils import (
     unified_strdate,
     unsmuggle_url,
     UnsupportedError,
+    url_or_none,
     xpath_text,
 )
 from .commonprotocols import RtmpIE
@@ -3130,8 +3131,8 @@ class GenericIE(InfoExtractor):
                 sources = [sources]
             formats = []
             for source in sources:
-                src = source.get('src')
-                if not src or not isinstance(src, compat_str):
+                src = url_or_none(source.get('src'))
+                if not src:
                     continue
                 src = compat_urlparse.urljoin(url, src)
                 src_type = source.get('type')

+ 5 - 5
youtube_dl/extractor/hidive.py

@@ -8,6 +8,7 @@ from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     int_or_none,
+    url_or_none,
     urlencode_postdata,
 )
 
@@ -80,8 +81,8 @@ class HiDiveIE(InfoExtractor):
             bitrates = rendition.get('bitrates')
             if not isinstance(bitrates, dict):
                 continue
-            m3u8_url = bitrates.get('hls')
-            if not isinstance(m3u8_url, compat_str):
+            m3u8_url = url_or_none(bitrates.get('hls'))
+            if not m3u8_url:
                 continue
             formats.extend(self._extract_m3u8_formats(
                 m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
@@ -93,9 +94,8 @@ class HiDiveIE(InfoExtractor):
                 if not isinstance(cc_file, list) or len(cc_file) < 3:
                     continue
                 cc_lang = cc_file[0]
-                cc_url = cc_file[2]
-                if not isinstance(cc_lang, compat_str) or not isinstance(
-                        cc_url, compat_str):
+                cc_url = url_or_none(cc_file[2])
+                if not isinstance(cc_lang, compat_str) or not cc_url:
                     continue
                 subtitles.setdefault(cc_lang, []).append({
                     'url': cc_url,

+ 3 - 3
youtube_dl/extractor/imdb.py

@@ -3,12 +3,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     determine_ext,
     mimetype2ext,
     parse_duration,
     qualities,
+    url_or_none,
 )
 
 
@@ -61,8 +61,8 @@ class ImdbIE(InfoExtractor):
         for encoding in video_metadata.get('encodings', []):
             if not encoding or not isinstance(encoding, dict):
                 continue
-            video_url = encoding.get('videoUrl')
-            if not video_url or not isinstance(video_url, compat_str):
+            video_url = url_or_none(encoding.get('videoUrl'))
+            if not video_url:
                 continue
             ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
             if ext == 'm3u8':

+ 2 - 1
youtube_dl/extractor/instagram.py

@@ -17,6 +17,7 @@ from ..utils import (
     lowercase_escape,
     std_headers,
     try_get,
+    url_or_none,
 )
 
 
@@ -170,7 +171,7 @@ class InstagramIE(InfoExtractor):
                             node = try_get(edge, lambda x: x['node'], dict)
                             if not node:
                                 continue
-                            node_video_url = try_get(node, lambda x: x['video_url'], compat_str)
+                            node_video_url = url_or_none(node.get('video_url'))
                             if not node_video_url:
                                 continue
                             entries.append({

+ 3 - 2
youtube_dl/extractor/itv.py

@@ -20,6 +20,7 @@ from ..utils import (
     merge_dicts,
     parse_duration,
     smuggle_url,
+    url_or_none,
     xpath_with_ns,
     xpath_element,
     xpath_text,
@@ -250,8 +251,8 @@ class ITVIE(InfoExtractor):
                     for sub in subs:
                         if not isinstance(sub, dict):
                             continue
-                        href = sub.get('Href')
-                        if isinstance(href, compat_str):
+                        href = url_or_none(sub.get('Href'))
+                        if href:
                             extract_subtitle(href)
                 if not info.get('duration'):
                     info['duration'] = parse_duration(video_data.get('Duration'))

+ 4 - 5
youtube_dl/extractor/keezmovies.py

@@ -4,16 +4,14 @@ import re
 
 from .common import InfoExtractor
 from ..aes import aes_decrypt_text
-from ..compat import (
-    compat_str,
-    compat_urllib_parse_unquote,
-)
+from ..compat import compat_urllib_parse_unquote
 from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
     str_to_int,
     strip_or_none,
+    url_or_none,
 )
 
 
@@ -55,7 +53,8 @@ class KeezMoviesIE(InfoExtractor):
         encrypted = False
 
         def extract_format(format_url, height=None):
-            if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')):
+            format_url = url_or_none(format_url)
+            if not format_url or not format_url.startswith(('http', '//')):
                 return
             if format_url in format_urls:
                 return

+ 3 - 2
youtube_dl/extractor/konserthusetplay.py

@@ -2,11 +2,11 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     determine_ext,
     float_or_none,
     int_or_none,
+    url_or_none,
 )
 
 
@@ -109,7 +109,8 @@ class KonserthusetPlayIE(InfoExtractor):
         captions = source.get('captionsAvailableLanguages')
         if isinstance(captions, dict):
             for lang, subtitle_url in captions.items():
-                if lang != 'none' and isinstance(subtitle_url, compat_str):
+                subtitle_url = url_or_none(subtitle_url)
+                if lang != 'none' and subtitle_url:
                     subtitles.setdefault(lang, []).append({'url': subtitle_url})
 
         return {

+ 3 - 2
youtube_dl/extractor/mediasite.py

@@ -15,6 +15,7 @@ from ..utils import (
     mimetype2ext,
     unescapeHTML,
     unsmuggle_url,
+    url_or_none,
     urljoin,
 )
 
@@ -156,8 +157,8 @@ class MediasiteIE(InfoExtractor):
 
             stream_formats = []
             for unum, VideoUrl in enumerate(video_urls):
-                video_url = VideoUrl.get('Location')
-                if not video_url or not isinstance(video_url, compat_str):
+                video_url = url_or_none(VideoUrl.get('Location'))
+                if not video_url:
                     continue
                 # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
 

+ 3 - 2
youtube_dl/extractor/peertube.py

@@ -10,6 +10,7 @@ from ..utils import (
     parse_resolution,
     try_get,
     unified_timestamp,
+    url_or_none,
     urljoin,
 )
 
@@ -200,8 +201,8 @@ class PeerTubeIE(InfoExtractor):
         for file_ in video['files']:
             if not isinstance(file_, dict):
                 continue
-            file_url = file_.get('fileUrl')
-            if not file_url or not isinstance(file_url, compat_str):
+            file_url = url_or_none(file_.get('fileUrl'))
+            if not file_url:
                 continue
             file_size = int_or_none(file_.get('size'))
             format_id = try_get(

+ 3 - 3
youtube_dl/extractor/redtube.py

@@ -3,12 +3,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     int_or_none,
     str_to_int,
     unified_strdate,
+    url_or_none,
 )
 
 
@@ -71,8 +71,8 @@ class RedTubeIE(InfoExtractor):
             video_id, fatal=False)
         if medias and isinstance(medias, list):
             for media in medias:
-                format_url = media.get('videoUrl')
-                if not format_url or not isinstance(format_url, compat_str):
+                format_url = url_or_none(media.get('videoUrl'))
+                if not format_url:
                     continue
                 format_id = media.get('quality')
                 formats.append({

+ 3 - 2
youtube_dl/extractor/rentv.py

@@ -6,6 +6,7 @@ from ..compat import compat_str
 from ..utils import (
     determine_ext,
     int_or_none,
+    url_or_none,
 )
 
 
@@ -37,8 +38,8 @@ class RENTVIE(InfoExtractor):
         title = config['title']
         formats = []
         for video in config['src']:
-            src = video.get('src')
-            if not src or not isinstance(src, compat_str):
+            src = url_or_none(video.get('src'))
+            if not src:
                 continue
             ext = determine_ext(src)
             if ext == 'm3u8':

+ 3 - 2
youtube_dl/extractor/rutube.py

@@ -16,6 +16,7 @@ from ..utils import (
     int_or_none,
     try_get,
     unified_timestamp,
+    url_or_none,
 )
 
 
@@ -176,8 +177,8 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
                 break
 
             for result in results:
-                video_url = result.get('video_url')
-                if not video_url or not isinstance(video_url, compat_str):
+                video_url = url_or_none(result.get('video_url'))
+                if not video_url:
                     continue
                 entry = self._extract_video(result, require_title=False)
                 entry.update({

+ 3 - 2
youtube_dl/extractor/turner.py

@@ -15,6 +15,7 @@ from ..utils import (
     update_url_query,
     ExtractorError,
     strip_or_none,
+    url_or_none,
 )
 
 
@@ -154,8 +155,8 @@ class TurnerBaseIE(AdobePassIE):
         subtitles = {}
         for source in video_data.findall('closedCaptions/source'):
             for track in source.findall('track'):
-                track_url = track.get('url')
-                if not isinstance(track_url, compat_str) or track_url.endswith('/big'):
+                track_url = url_or_none(track.get('url'))
+                if not track_url or track_url.endswith('/big'):
                     continue
                 lang = track.get('lang') or track.get('label') or 'en'
                 subtitles.setdefault(lang, []).append({

+ 3 - 4
youtube_dl/extractor/tvnet.py

@@ -4,10 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     int_or_none,
     unescapeHTML,
+    url_or_none,
 )
 
 
@@ -106,9 +106,8 @@ class TVNetIE(InfoExtractor):
         for stream in self._download_json(data_file, video_id):
             if not isinstance(stream, dict):
                 continue
-            stream_url = stream.get('url')
-            if (stream_url in stream_urls or not stream_url or
-                    not isinstance(stream_url, compat_str)):
+            stream_url = url_or_none(stream.get('url'))
+            if stream_url in stream_urls or not stream_url:
                 continue
             stream_urls.add(stream_url)
             formats.extend(self._extract_m3u8_formats(

+ 3 - 1
youtube_dl/extractor/tvplay.py

@@ -19,6 +19,7 @@ from ..utils import (
     try_get,
     unsmuggle_url,
     update_url_query,
+    url_or_none,
 )
 
 
@@ -255,7 +256,8 @@ class TVPlayIE(InfoExtractor):
         quality = qualities(['hls', 'medium', 'high'])
         formats = []
         for format_id, video_url in streams.get('streams', {}).items():
-            if not video_url or not isinstance(video_url, compat_str):
+            video_url = url_or_none(video_url)
+            if not video_url:
                 continue
             ext = determine_ext(video_url)
             if ext == 'f4m':

+ 3 - 2
youtube_dl/extractor/twitch.py

@@ -27,6 +27,7 @@ from ..utils import (
     unified_timestamp,
     update_url_query,
     urlencode_postdata,
+    url_or_none,
     urljoin,
 )
 
@@ -663,8 +664,8 @@ class TwitchClipsIE(TwitchBaseIE):
         for option in status['quality_options']:
             if not isinstance(option, dict):
                 continue
-            source = option.get('source')
-            if not source or not isinstance(source, compat_str):
+            source = url_or_none(option.get('source'))
+            if not source:
                 continue
             formats.append({
                 'url': source,

+ 7 - 6
youtube_dl/extractor/udemy.py

@@ -20,6 +20,7 @@ from ..utils import (
     sanitized_Request,
     try_get,
     unescapeHTML,
+    url_or_none,
     urlencode_postdata,
 )
 
@@ -265,8 +266,8 @@ class UdemyIE(InfoExtractor):
             if not isinstance(source_list, list):
                 return
             for source in source_list:
-                video_url = source.get('file') or source.get('src')
-                if not video_url or not isinstance(video_url, compat_str):
+                video_url = url_or_none(source.get('file') or source.get('src'))
+                if not video_url:
                     continue
                 if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
                     formats.extend(self._extract_m3u8_formats(
@@ -293,8 +294,8 @@ class UdemyIE(InfoExtractor):
                     continue
                 if track.get('kind') != 'captions':
                     continue
-                src = track.get('src')
-                if not src or not isinstance(src, compat_str):
+                src = url_or_none(track.get('src'))
+                if not src:
                     continue
                 lang = track.get('language') or track.get(
                     'srclang') or track.get('label')
@@ -314,8 +315,8 @@ class UdemyIE(InfoExtractor):
             for cc in captions:
                 if not isinstance(cc, dict):
                     continue
-                cc_url = cc.get('url')
-                if not cc_url or not isinstance(cc_url, compat_str):
+                cc_url = url_or_none(cc.get('url'))
+                if not cc_url:
                     continue
                 lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
                 sub_dict = (automatic_captions if cc.get('source') == 'auto'

+ 4 - 6
youtube_dl/extractor/vidme.py

@@ -3,15 +3,13 @@ from __future__ import unicode_literals
 import itertools
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_HTTPError,
-    compat_str,
-)
+from ..compat import compat_HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
     float_or_none,
     parse_iso8601,
+    url_or_none,
 )
 
 
@@ -166,8 +164,8 @@ class VidmeIE(InfoExtractor):
 
         formats = []
         for f in video.get('formats', []):
-            format_url = f.get('uri')
-            if not format_url or not isinstance(format_url, compat_str):
+            format_url = url_or_none(f.get('uri'))
+            if not format_url:
                 continue
             format_type = f.get('type')
             if format_type == 'dash':

+ 3 - 1
youtube_dl/extractor/vk.py

@@ -20,6 +20,7 @@ from ..utils import (
     str_to_int,
     unescapeHTML,
     unified_timestamp,
+    url_or_none,
     urlencode_postdata,
 )
 from .dailymotion import DailymotionIE
@@ -423,7 +424,8 @@ class VKIE(VKBaseIE):
 
         formats = []
         for format_id, format_url in data.items():
-            if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
+            format_url = url_or_none(format_url)
+            if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
                 continue
             if (format_id.startswith(('url', 'cache')) or
                     format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):

+ 5 - 2
youtube_dl/extractor/xhamster.py

@@ -13,6 +13,7 @@ from ..utils import (
     parse_duration,
     try_get,
     unified_strdate,
+    url_or_none,
 )
 
 
@@ -137,7 +138,8 @@ class XHamsterIE(InfoExtractor):
                     else:
                         format_url = format_item
                         filesize = None
-                    if not isinstance(format_url, compat_str):
+                    format_url = url_or_none(format_url)
+                    if not format_url:
                         continue
                     formats.append({
                         'format_id': '%s-%s' % (format_id, quality),
@@ -198,7 +200,8 @@ class XHamsterIE(InfoExtractor):
                 default='{}'),
             video_id, fatal=False)
         for format_id, format_url in sources.items():
-            if not isinstance(format_url, compat_str):
+            format_url = url_or_none(format_url)
+            if not format_url:
                 continue
             if format_url in format_urls:
                 continue

+ 4 - 4
youtube_dl/extractor/yapfiles.py

@@ -4,12 +4,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     int_or_none,
     qualities,
     unescapeHTML,
+    url_or_none,
 )
 
 
@@ -80,9 +80,9 @@ class YapFilesIE(InfoExtractor):
         formats = []
         for format_id in QUALITIES:
             is_hd = format_id == 'hd'
-            format_url = playlist.get(
-                'file%s' % ('_hd' if is_hd else ''))
-            if not format_url or not isinstance(format_url, compat_str):
+            format_url = url_or_none(playlist.get(
+                'file%s' % ('_hd' if is_hd else '')))
+            if not format_url:
                 continue
             formats.append({
                 'url': format_url,

+ 3 - 3
youtube_dl/extractor/youjizz.py

@@ -3,11 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     determine_ext,
     int_or_none,
     parse_duration,
+    url_or_none,
 )
 
 
@@ -50,8 +50,8 @@ class YouJizzIE(InfoExtractor):
         for encoding in encodings:
             if not isinstance(encoding, dict):
                 continue
-            format_url = encoding.get('filename')
-            if not isinstance(format_url, compat_str):
+            format_url = url_or_none(encoding.get('filename'))
+            if not format_url:
                 continue
             if determine_ext(format_url) == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(

+ 3 - 3
youtube_dl/extractor/youporn.py

@@ -3,13 +3,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     int_or_none,
     sanitized_Request,
     str_to_int,
     unescapeHTML,
     unified_strdate,
+    url_or_none,
 )
 from ..aes import aes_decrypt_text
 
@@ -88,8 +88,8 @@ class YouPornIE(InfoExtractor):
             for definition in definitions:
                 if not isinstance(definition, dict):
                     continue
-                video_url = definition.get('videoUrl')
-                if isinstance(video_url, compat_str) and video_url:
+                video_url = url_or_none(definition.get('videoUrl'))
+                if video_url:
                     links.append(video_url)
 
         # Fallback #1, this also contains extra low quality 180p format

+ 3 - 2
youtube_dl/extractor/zattoo.py

@@ -13,6 +13,7 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     try_get,
+    url_or_none,
     urlencode_postdata,
 )
 
@@ -150,8 +151,8 @@ class ZattooBaseIE(InfoExtractor):
             for watch in watch_urls:
                 if not isinstance(watch, dict):
                     continue
-                watch_url = watch.get('url')
-                if not watch_url or not isinstance(watch_url, compat_str):
+                watch_url = url_or_none(watch.get('url'))
+                if not watch_url:
                     continue
                 format_id_list = [stream_type]
                 maxrate = watch.get('maxrate')

+ 7 - 5
youtube_dl/extractor/zdf.py

@@ -15,6 +15,7 @@ from ..utils import (
     try_get,
     unified_timestamp,
     update_url_query,
+    url_or_none,
     urljoin,
 )
 
@@ -67,8 +68,8 @@ class ZDFIE(ZDFBaseIE):
     def _extract_subtitles(src):
         subtitles = {}
         for caption in try_get(src, lambda x: x['captions'], list) or []:
-            subtitle_url = caption.get('uri')
-            if subtitle_url and isinstance(subtitle_url, compat_str):
+            subtitle_url = url_or_none(caption.get('uri'))
+            if subtitle_url:
                 lang = caption.get('language', 'deu')
                 subtitles.setdefault(lang, []).append({
                     'url': subtitle_url,
@@ -76,8 +77,8 @@ class ZDFIE(ZDFBaseIE):
         return subtitles
 
     def _extract_format(self, video_id, formats, format_urls, meta):
-        format_url = meta.get('url')
-        if not format_url or not isinstance(format_url, compat_str):
+        format_url = url_or_none(meta.get('url'))
+        if not format_url:
             return
         if format_url in format_urls:
             return
@@ -152,7 +153,8 @@ class ZDFIE(ZDFBaseIE):
             content, lambda x: x['teaserImageRef']['layouts'], dict)
         if layouts:
             for layout_key, layout_url in layouts.items():
-                if not isinstance(layout_url, compat_str):
+                layout_url = url_or_none(layout_url)
+                if not layout_url:
                     continue
                 thumbnail = {
                     'url': layout_url,