Browse Source

[lecturio] Improve subtitles extraction (closes #18488)

Sergey M․ 6 years ago
parent
commit
0a05cfabb6
1 changed files with 8 additions and 2 deletions
  1. 8 2
      youtube_dl/extractor/lecturio.py

+ 8 - 2
youtube_dl/extractor/lecturio.py

@@ -136,9 +136,15 @@ class LecturioIE(LecturioBaseIE):
             cc_url = url_or_none(cc_url)
             cc_url = url_or_none(cc_url)
             if not cc_url:
             if not cc_url:
                 continue
                 continue
-            sub_dict = automatic_captions if 'auto-translated' in cc_label else subtitles
             lang = self._search_regex(
             lang = self._search_regex(
-                r'/([a-z]{2})_', cc_url, 'lang', default=cc_label.split()[0])
+                r'/([a-z]{2})_', cc_url, 'lang',
+                default=cc_label.split()[0] if cc_label else 'en')
+            original_lang = self._search_regex(
+                r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang',
+                default=None)
+            sub_dict = (automatic_captions
+                        if 'auto-translated' in cc_label or original_lang
+                        else subtitles)
             sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
             sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
                 'url': cc_url,
                 'url': cc_url,
             })
             })