浏览代码

[utils] Add `subs_list_to_dict()` traversal helper

Thx: yt-dlp/yt-dlp#10653, etc
dirkf 1 月之前
父节点
当前提交
70b40dd1ef
共有 3 个文件被更改,包括 147 次插入0 次删除
  1. 101 0
      test/test_traversal.py
  2. 1 0
      youtube_dl/traversal.py
  3. 45 0
      youtube_dl/utils.py

+ 101 - 0
test/test_traversal.py

@@ -16,6 +16,7 @@ from youtube_dl.traversal import (
     dict_get,
     get_first,
     require,
+    subs_list_to_dict,
     T,
     traverse_obj,
     unpack,
@@ -30,6 +31,7 @@ from youtube_dl.compat import (
     compat_zip as zip,
 )
 from youtube_dl.utils import (
+    determine_ext,
     ExtractorError,
     int_or_none,
     join_nonempty,
@@ -495,6 +497,105 @@ class TestTraversalHelpers(_TestCase):
             traverse_obj(_TEST_DATA, ('str', T(require('value')))), 'str',
             '`require` should pass through non-`None` values')
 
+    def test_subs_list_to_dict(self):
+        self.assertEqual(traverse_obj([
+            {'name': 'de', 'url': 'https://example.com/subs/de.vtt'},
+            {'name': 'en', 'url': 'https://example.com/subs/en1.ass'},
+            {'name': 'en', 'url': 'https://example.com/subs/en2.ass'},
+        ], [Ellipsis, {
+            'id': 'name',
+            'url': 'url',
+        }, all, T(subs_list_to_dict)]), {
+            'de': [{'url': 'https://example.com/subs/de.vtt'}],
+            'en': [
+                {'url': 'https://example.com/subs/en1.ass'},
+                {'url': 'https://example.com/subs/en2.ass'},
+            ],
+        }, 'function should build subtitle dict from list of subtitles')
+        self.assertEqual(traverse_obj([
+            {'name': 'de', 'url': 'https://example.com/subs/de.ass'},
+            {'name': 'de'},
+            {'name': 'en', 'content': 'content'},
+            {'url': 'https://example.com/subs/en'},
+        ], [Ellipsis, {
+            'id': 'name',
+            'data': 'content',
+            'url': 'url',
+        }, all, T(subs_list_to_dict(lang=None))]), {
+            'de': [{'url': 'https://example.com/subs/de.ass'}],
+            'en': [{'data': 'content'}],
+        }, 'subs with mandatory items missing should be filtered')
+        self.assertEqual(traverse_obj([
+            {'url': 'https://example.com/subs/de.ass', 'name': 'de'},
+            {'url': 'https://example.com/subs/en', 'name': 'en'},
+        ], [Ellipsis, {
+            'id': 'name',
+            'ext': ['url', T(determine_ext(default_ext=None))],
+            'url': 'url',
+        }, all, T(subs_list_to_dict(ext='ext'))]), {
+            'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
+            'en': [{'url': 'https://example.com/subs/en', 'ext': 'ext'}],
+        }, '`ext` should set default ext but leave existing value untouched')
+        self.assertEqual(traverse_obj([
+            {'name': 'en', 'url': 'https://example.com/subs/en2', 'prio': True},
+            {'name': 'en', 'url': 'https://example.com/subs/en1', 'prio': False},
+        ], [Ellipsis, {
+            'id': 'name',
+            'quality': ['prio', T(int)],
+            'url': 'url',
+        }, all, T(subs_list_to_dict(ext='ext'))]), {'en': [
+            {'url': 'https://example.com/subs/en1', 'ext': 'ext'},
+            {'url': 'https://example.com/subs/en2', 'ext': 'ext'},
+        ]}, '`quality` key should sort subtitle list accordingly')
+        self.assertEqual(traverse_obj([
+            {'name': 'de', 'url': 'https://example.com/subs/de.ass'},
+            {'name': 'de'},
+            {'name': 'en', 'content': 'content'},
+            {'url': 'https://example.com/subs/en'},
+        ], [Ellipsis, {
+            'id': 'name',
+            'url': 'url',
+            'data': 'content',
+        }, all, T(subs_list_to_dict(lang='en'))]), {
+            'de': [{'url': 'https://example.com/subs/de.ass'}],
+            'en': [
+                {'data': 'content'},
+                {'url': 'https://example.com/subs/en'},
+            ],
+        }, 'optionally provided lang should be used if no id available')
+        self.assertEqual(traverse_obj([
+            {'name': 1, 'url': 'https://example.com/subs/de1'},
+            {'name': {}, 'url': 'https://example.com/subs/de2'},
+            {'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
+            {'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
+        ], [Ellipsis, {
+            'id': 'name',
+            'url': 'url',
+            'ext': 'ext',
+        }, all, T(subs_list_to_dict(lang=None))]), {
+            'de': [
+                {'url': 'https://example.com/subs/de3'},
+                {'url': 'https://example.com/subs/de4'},
+            ],
+        }, 'non str types should be ignored for id and ext')
+        self.assertEqual(traverse_obj([
+            {'name': 1, 'url': 'https://example.com/subs/de1'},
+            {'name': {}, 'url': 'https://example.com/subs/de2'},
+            {'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
+            {'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
+        ], [Ellipsis, {
+            'id': 'name',
+            'url': 'url',
+            'ext': 'ext',
+        }, all, T(subs_list_to_dict(lang='de'))]), {
+            'de': [
+                {'url': 'https://example.com/subs/de1'},
+                {'url': 'https://example.com/subs/de2'},
+                {'url': 'https://example.com/subs/de3'},
+                {'url': 'https://example.com/subs/de4'},
+            ],
+        }, 'non str types should be replaced by default id')
+
     def test_unpack(self):
         self.assertEqual(
             unpack(lambda *x: ''.join(map(compat_str, x)))([1, 2, 3]), '123')

+ 1 - 0
youtube_dl/traversal.py

@@ -6,6 +6,7 @@ from .utils import (
     dict_get,
     get_first,
     require,
+    subs_list_to_dict,
     T,
     traverse_obj,
     unpack,

+ 45 - 0
youtube_dl/utils.py

@@ -6599,6 +6599,51 @@ class require(ExtractorError):
         return value
 
 
+@partial_application
+# typing: (subs: list[dict], /, *, lang='und', ext=None) -> dict[str, list[dict]
+def subs_list_to_dict(subs, lang='und', ext=None):
+    """
+    Convert subtitles from a traversal into a subtitle dict.
+    The path should have an `all` immediately before this function.
+
+    Arguments:
+    `lang`     The default language tag for subtitle dicts with no
+               `lang` (`und`: undefined)
+    `ext`      The default value for `ext` in the subtitle dicts
+
+    In the dict you can set the following additional items:
+    `id`       The language tag to which the subtitle dict should be added
+    `quality`  The sort order for each subtitle dict
+    """
+
+    result = collections.defaultdict(list)
+
+    for sub in subs:
+        tn_url = url_or_none(sub.pop('url', None))
+        if tn_url:
+            sub['url'] = tn_url
+        elif not sub.get('data'):
+            continue
+        sub_lang = sub.pop('id', None)
+        if not isinstance(sub_lang, compat_str):
+            if not lang:
+                continue
+            sub_lang = lang
+        sub_ext = sub.get('ext')
+        if not isinstance(sub_ext, compat_str):
+            if not ext:
+                sub.pop('ext', None)
+            else:
+                sub['ext'] = ext
+        result[sub_lang].append(sub)
+    result = dict(result)
+
+    for subs in result.values():
+        subs.sort(key=lambda x: x.pop('quality', 0) or 0)
+
+    return result
+
+
 def unpack(func, **kwargs):
     """Make a function that applies `partial(func, **kwargs)` to its argument as *args"""
     @functools.wraps(func)