瀏覽代碼

[extractor/common] Add _meta_regex and clarify tags field

Sergey M․ 10 年之前
父節點
當前提交
864f24bd2c
共有 1 個文件被更改,包括 8 次插入4 次删除
  1. 8 4
      youtube_dl/extractor/common.py

+ 8 - 4
youtube_dl/extractor/common.py

@@ -181,13 +181,13 @@ class InfoExtractor(object):
                     by YoutubeDL if it's missing)
                     by YoutubeDL if it's missing)
     categories:     A list of categories that the video falls in, for example
     categories:     A list of categories that the video falls in, for example
                     ["Sports", "Berlin"]
                     ["Sports", "Berlin"]
+    tags:           A list of tags assigned to the video, e.g. ["sweden", "pop music"]
     is_live:        True, False, or None (=unknown). Whether this video is a
     is_live:        True, False, or None (=unknown). Whether this video is a
                     live stream that goes on instead of a fixed-length video.
                     live stream that goes on instead of a fixed-length video.
     start_time:     Time in seconds where the reproduction should start, as
     start_time:     Time in seconds where the reproduction should start, as
                     specified in the URL.
                     specified in the URL.
     end_time:       Time in seconds where the reproduction should end, as
     end_time:       Time in seconds where the reproduction should end, as
                     specified in the URL.
                     specified in the URL.
-    tags:           A list of keywords attached to the video.
 
 
     Unless mentioned otherwise, the fields should be Unicode strings.
     Unless mentioned otherwise, the fields should be Unicode strings.
 
 
@@ -631,6 +631,12 @@ class InfoExtractor(object):
             template % (content_re, property_re),
             template % (content_re, property_re),
         ]
         ]
 
 
+    @staticmethod
+    def _meta_regex(prop):
+        return r'''(?isx)<meta
+                    (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
+                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
+
     def _og_search_property(self, prop, html, name=None, **kargs):
     def _og_search_property(self, prop, html, name=None, **kargs):
         if name is None:
         if name is None:
             name = 'OpenGraph %s' % prop
             name = 'OpenGraph %s' % prop
@@ -661,9 +667,7 @@ class InfoExtractor(object):
         if display_name is None:
         if display_name is None:
             display_name = name
             display_name = name
         return self._html_search_regex(
         return self._html_search_regex(
-            r'''(?isx)<meta
-                    (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
-                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
+            self._meta_regex(name),
             html, display_name, fatal=fatal, group='content', **kwargs)
             html, display_name, fatal=fatal, group='content', **kwargs)
 
 
     def _dc_search_uploader(self, html):
     def _dc_search_uploader(self, html):