2 years ago · b2ba24bb02
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -4,6 +4,7 @@ from inspect import getsource
 
				 import io
			
 
				 import os
			
 
				 from os.path import dirname as dirn
			
 
				+import re
			
 
				 import sys
			
 
				 
			
 
				 print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
			
@@ -29,11 +30,18 @@ from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
 
				 with open('devscripts/lazy_load_template.py', 'rt') as f:
			
 
				     module_template = f.read()
			
 
				 
			
 
				+
			
 
				+def get_source(m):
			
 
				+    return re.sub(r'(?m)^\s*#.*\n', '', getsource(m))
			
 
				+
			
 
				+
			
 
				 module_contents = [
			
 
				-    module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
			
 
				+    module_template,
			
 
				+    get_source(InfoExtractor.suitable),
			
 
				+    get_source(InfoExtractor._match_valid_url) + '\n',
			
 
				     'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n',
			
 
				     # needed for suitable() methods of Youtube extractor (see #28780)
			
 
				-    'from youtube_dl.utils import parse_qs\n',
			
 
				+    'from youtube_dl.utils import parse_qs, variadic\n',
			
 
				 ]
			
 
				 
			
 
				 ie_template = '''
			
@@ -66,7 +74,7 @@ def build_lazy_ie(ie, name):
 
				         valid_url=valid_url,
			
 
				         module=ie.__module__)
			
 
				     if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
			
 
				-        s += '\n' + getsource(ie.suitable)
			
 
				+        s += '\n' + get_source(ie.suitable)
			
 
				     if hasattr(ie, '_make_valid_url'):
			
 
				         # search extractors
			
 
				         s += make_valid_template.format(valid_url=ie._make_valid_url())
			
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -83,6 +83,7 @@ from ..utils import (
 
				     urljoin,
			
 
				     url_basename,
			
 
				     url_or_none,
			
 
				+    variadic,
			
 
				     xpath_element,
			
 
				     xpath_text,
			
 
				     xpath_with_ns,
			
@@ -371,9 +372,22 @@ class InfoExtractor(object):
 
				     title, description etc.
			
 
				 
			
 
				 
			
 
				-    Subclasses of this one should re-define the _real_initialize() and
			
 
				-    _real_extract() methods and define a _VALID_URL regexp.
			
 
				-    Probably, they should also be added to the list of extractors.
			
 
				+    A subclass of InfoExtractor must be defined to handle each specific site (or
			
 
				+    several sites). Such a concrete subclass should be added to the list of
			
 
				+    extractors. It should also:
			
 
				+    * define its _VALID_URL attribute as a regexp, or a Sequence of alternative
			
 
				+      regexps (but see below)
			
 
				+    * re-define the _real_extract() method
			
 
				+    * optionally re-define the _real_initialize() method.
			
 
				+
			
 
				+    An extractor subclass may also override suitable() if necessary, but the
			
 
				+    function signature must be preserved and the function must import everything
			
 
				+    it needs (except other extractors), so that lazy_extractors works correctly.
			
 
				+    If the subclass's suitable() and _real_extract() functions avoid using
			
 
				+    _VALID_URL, the subclass need not set that class attribute.
			
 
				+
			
 
				+    An abstract subclass of InfoExtractor may be used to simplify implementation
			
 
				+    within an extractor module; it should not be added to the list of extractors.
			
 
				 
			
 
				     _GEO_BYPASS attribute may be set to False in order to disable
			
 
				     geo restriction bypass mechanisms for a particular extractor.
			
@@ -409,21 +423,32 @@ class InfoExtractor(object):
 
				         self.set_downloader(downloader)
			
 
				 
			
 
				     @classmethod
			
 
				-    def suitable(cls, url):
			
 
				-        """Receives a URL and returns True if suitable for this IE."""
			
 
				-
			
 
				+    def __match_valid_url(cls, url):
			
 
				         # This does not use has/getattr intentionally - we want to know whether
			
 
				-        # we have cached the regexp for *this* class, whereas getattr would also
			
 
				-        # match the superclass
			
 
				+        # we have cached the regexp for cls, whereas getattr would also
			
 
				+        # match its superclass
			
 
				         if '_VALID_URL_RE' not in cls.__dict__:
			
 
				-            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
			
 
				-        return cls._VALID_URL_RE.match(url) is not None
			
 
				+            # _VALID_URL can now be a list/tuple of patterns
			
 
				+            cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
			
 
				+        # 20% faster than next(filter(None, (p.match(url) for p in cls._VALID_URL_RE)), None) in 2.7
			
 
				+        for p in cls._VALID_URL_RE:
			
 
				+            p = p.match(url)
			
 
				+            if p:
			
 
				+                return p
			
 
				+
			
 
				+    # The public alias can safely be overridden, as in some back-ports
			
 
				+    _match_valid_url = __match_valid_url
			
 
				+
			
 
				+    @classmethod
			
 
				+    def suitable(cls, url):
			
 
				+        """Receives a URL and returns True if suitable for this IE."""
			
 
				+        # This function must import everything it needs (except other extractors),
			
 
				+        # so that lazy_extractors works correctly
			
 
				+        return cls.__match_valid_url(url) is not None
			
 
				 
			
 
				     @classmethod
			
 
				     def _match_id(cls, url):
			
 
				-        if '_VALID_URL_RE' not in cls.__dict__:
			
 
				-            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
			
 
				-        m = cls._VALID_URL_RE.match(url)
			
 
				+        m = cls.__match_valid_url(url)
			
 
				         assert m
			
 
				         return compat_str(m.group('id'))
			
 
				 
			
--- a/youtube_dl/extractor/globalplayer.py
+++ b/youtube_dl/extractor/globalplayer.py
@@ -18,12 +18,6 @@ from ..utils import (
 
				 
			
 
				 class GlobalPlayerBaseIE(InfoExtractor):
			
 
				 
			
 
				-    import re
			
 
				-
			
 
				-    @classmethod
			
 
				-    def _match_valid_url(cls, url):
			
 
				-        return cls.re.match(cls._VALID_URL, url)
			
 
				-
			
 
				     def _get_page_props(self, url, video_id):
			
 
				         webpage = self._download_webpage(url, video_id)
			
 
				         return self._search_nextjs_data(webpage, video_id)['props']['pageProps']