Browse Source

Allow to specify multiple subtitles languages separated by commas (closes #518)

Jaime Marquínez Ferrándiz 12 years ago
parent
commit
aa6a10c44a
4 changed files with 36 additions and 20 deletions
  1. 11 2
      test/test_youtube_subtitles.py
  2. 1 1
      youtube_dl/YoutubeDL.py
  3. 8 4
      youtube_dl/__init__.py
  4. 16 13
      youtube_dl/extractor/youtube.py

+ 11 - 2
test/test_youtube_subtitles.py

@@ -40,7 +40,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
     def test_youtube_subtitles_it(self):
     def test_youtube_subtitles_it(self):
         DL = FakeYDL()
         DL = FakeYDL()
         DL.params['writesubtitles'] = True
         DL.params['writesubtitles'] = True
-        DL.params['subtitleslang'] = 'it'
+        DL.params['subtitleslangs'] = ['it']
         IE = YoutubeIE(DL)
         IE = YoutubeIE(DL)
         info_dict = IE.extract('QRS8MkLhQmM')
         info_dict = IE.extract('QRS8MkLhQmM')
         sub = info_dict[0]['subtitles']['it']
         sub = info_dict[0]['subtitles']['it']
@@ -85,11 +85,20 @@ class TestYoutubeSubtitles(unittest.TestCase):
     def test_youtube_automatic_captions(self):
     def test_youtube_automatic_captions(self):
         DL = FakeYDL()
         DL = FakeYDL()
         DL.params['writeautomaticsub'] = True
         DL.params['writeautomaticsub'] = True
-        DL.params['subtitleslang'] = 'it'
+        DL.params['subtitleslangs'] = ['it']
         IE = YoutubeIE(DL)
         IE = YoutubeIE(DL)
         info_dict = IE.extract('8YoUxe5ncPo')
         info_dict = IE.extract('8YoUxe5ncPo')
         sub = info_dict[0]['subtitles']['it']
         sub = info_dict[0]['subtitles']['it']
         self.assertTrue(sub is not None)
         self.assertTrue(sub is not None)
+    def test_youtube_multiple_langs(self):
+        DL = FakeYDL()
+        DL.params['writesubtitles'] = True
+        langs = ['it', 'fr', 'de']
+        DL.params['subtitleslangs'] = langs
+        IE = YoutubeIE(DL)
+        subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles']
+        for lang in langs:
+            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     unittest.main()
     unittest.main()

+ 1 - 1
youtube_dl/YoutubeDL.py

@@ -76,7 +76,7 @@ class YoutubeDL(object):
     allsubtitles:      Downloads all the subtitles of the video
     allsubtitles:      Downloads all the subtitles of the video
     listsubtitles:     Lists all available subtitles for the video
     listsubtitles:     Lists all available subtitles for the video
     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
-    subtitleslang:     Language of the subtitles to download
+    subtitleslangs:    List of languages of the subtitles to download
     keepvideo:         Keep the video file after post-processing
     keepvideo:         Keep the video file after post-processing
     daterange:         A DateRange object, download only if the upload_date is in the range.
     daterange:         A DateRange object, download only if the upload_date is in the range.
     skip_download:     Skip the actual download of the video file
     skip_download:     Skip the actual download of the video file

+ 8 - 4
youtube_dl/__init__.py

@@ -83,6 +83,9 @@ def parseOpts(overrideArguments=None):
 
 
         return "".join(opts)
         return "".join(opts)
 
 
+    def _comma_separated_values_options_callback(option, opt_str, value, parser):
+        setattr(parser.values, option.dest, value.split(','))
+
     def _find_term_columns():
     def _find_term_columns():
         columns = os.environ.get('COLUMNS', None)
         columns = os.environ.get('COLUMNS', None)
         if columns:
         if columns:
@@ -206,9 +209,10 @@ def parseOpts(overrideArguments=None):
     subtitles.add_option('--sub-format',
     subtitles.add_option('--sub-format',
             action='store', dest='subtitlesformat', metavar='FORMAT',
             action='store', dest='subtitlesformat', metavar='FORMAT',
             help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
             help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
-    subtitles.add_option('--sub-lang', '--srt-lang',
-            action='store', dest='subtitleslang', metavar='LANG',
-            help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
+    subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
+            action='callback', dest='subtitleslang', metavar='LANGS', type='str',
+            default=[], callback=_comma_separated_values_options_callback,
+            help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
 
 
     downloader.add_option('-r', '--rate-limit',
     downloader.add_option('-r', '--rate-limit',
             dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
             dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
@@ -573,7 +577,7 @@ def _real_main(argv=None):
         'allsubtitles': opts.allsubtitles,
         'allsubtitles': opts.allsubtitles,
         'listsubtitles': opts.listsubtitles,
         'listsubtitles': opts.listsubtitles,
         'subtitlesformat': opts.subtitlesformat,
         'subtitlesformat': opts.subtitlesformat,
-        'subtitleslang': opts.subtitleslang,
+        'subtitleslangs': opts.subtitleslang,
         'matchtitle': decodeOption(opts.matchtitle),
         'matchtitle': decodeOption(opts.matchtitle),
         'rejecttitle': decodeOption(opts.rejecttitle),
         'rejecttitle': decodeOption(opts.rejecttitle),
         'max_downloads': opts.max_downloads,
         'max_downloads': opts.max_downloads,

+ 16 - 13
youtube_dl/extractor/youtube.py

@@ -496,7 +496,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     def _request_automatic_caption(self, video_id, webpage):
     def _request_automatic_caption(self, video_id, webpage):
         """We need the webpage for getting the captions url, pass it as an
         """We need the webpage for getting the captions url, pass it as an
            argument to speed up the process."""
            argument to speed up the process."""
-        sub_lang = self._downloader.params.get('subtitleslang') or 'en'
+        sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
         sub_format = self._downloader.params.get('subtitlesformat')
         sub_format = self._downloader.params.get('subtitlesformat')
         self.to_screen(u'%s: Looking for automatic captions' % video_id)
         self.to_screen(u'%s: Looking for automatic captions' % video_id)
         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
@@ -530,23 +530,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         Return a dictionary: {language: subtitles} or {} if the subtitles
         Return a dictionary: {language: subtitles} or {} if the subtitles
         couldn't be found
         couldn't be found
         """
         """
-        sub_lang_list = self._get_available_subtitles(video_id)
+        available_subs_list = self._get_available_subtitles(video_id)
         sub_format = self._downloader.params.get('subtitlesformat')
         sub_format = self._downloader.params.get('subtitlesformat')
-        if  not sub_lang_list: #There was some error, it didn't get the available subtitles
+        if  not available_subs_list: #There was some error, it didn't get the available subtitles
             return {}
             return {}
         if self._downloader.params.get('allsubtitles', False):
         if self._downloader.params.get('allsubtitles', False):
-            pass
+            sub_lang_list = available_subs_list
         else:
         else:
-            if self._downloader.params.get('subtitleslang', False):
-                sub_lang = self._downloader.params.get('subtitleslang')
-            elif 'en' in sub_lang_list:
-                sub_lang = 'en'
+            if self._downloader.params.get('subtitleslangs', False):
+                reqested_langs = self._downloader.params.get('subtitleslangs')
+            elif 'en' in available_subs_list:
+                reqested_langs = ['en']
             else:
             else:
-                sub_lang = list(sub_lang_list.keys())[0]
-            if not sub_lang in sub_lang_list:
-                self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
-                return {}
-            sub_lang_list = {sub_lang: sub_lang_list[sub_lang]}
+                reqested_langs = [list(available_subs_list.keys())[0]]
+
+            sub_lang_list = {}
+            for sub_lang in reqested_langs:
+                if not sub_lang in available_subs_list:
+                    self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
+                    continue
+                sub_lang_list[sub_lang] = available_subs_list[sub_lang]
         subtitles = {}
         subtitles = {}
         for sub_lang in sub_lang_list:
         for sub_lang in sub_lang_list:
             subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
             subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)