subtitles.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. import socket
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. compat_http_client,
  5. compat_urllib_error,
  6. compat_urllib_request,
  7. compat_str,
  8. )
  9. class SubtitlesIE(InfoExtractor):
  10. def report_video_subtitles_available(self, video_id, sub_lang_list):
  11. """Report available subtitles."""
  12. sub_lang = ",".join(list(sub_lang_list.keys()))
  13. self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
  14. def _list_available_subtitles(self, video_id):
  15. sub_lang_list = self._get_available_subtitles(video_id)
  16. self.report_video_subtitles_available(video_id, sub_lang_list)
  17. def _extract_subtitles(self, video_id):
  18. """
  19. Return a dictionary: {language: subtitles} or {} if the subtitles
  20. couldn't be found
  21. """
  22. sub_lang_list = self._get_available_subtitles(video_id)
  23. sub_format = self._downloader.params.get('subtitlesformat')
  24. if not sub_lang_list: #There was some error, it didn't get the available subtitles
  25. return {}
  26. if self._downloader.params.get('writesubtitles', False):
  27. if self._downloader.params.get('subtitleslang', False):
  28. sub_lang = self._downloader.params.get('subtitleslang')
  29. elif 'en' in sub_lang_list:
  30. sub_lang = 'en'
  31. else:
  32. sub_lang = list(sub_lang_list.keys())[0]
  33. if not sub_lang in sub_lang_list:
  34. self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
  35. return {}
  36. sub_lang_list = {sub_lang: sub_lang_list[sub_lang]}
  37. subtitles = {}
  38. for sub_lang in sub_lang_list:
  39. subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
  40. if subtitle:
  41. subtitles[sub_lang] = subtitle
  42. return subtitles
  43. def _request_subtitle(self, sub_lang, sub_name, video_id, format):
  44. """ Return the subtitle as a string or None if they are not found """
  45. # return (u'Did not fetch video subtitles for %s' % sub_lang, None, None)
  46. self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
  47. url = self._get_subtitle_url(sub_lang, sub_name, video_id, format)
  48. try:
  49. sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
  50. except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  51. self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
  52. return
  53. if not sub:
  54. self._downloader.report_warning(u'Did not fetch video subtitles')
  55. return
  56. return sub
  57. def _get_available_subtitles(self, video_id):
  58. """Get available subtitles. Redefine in subclasses."""
  59. """returns {(lang, url)} """
  60. # return {}
  61. pass
  62. def _get_subtitle_url(self, sub_lang, sub_name, video_id, format):
  63. """returns the url for the given subtitle. Redefine in subclasses."""
  64. pass
  65. def _request_automatic_caption(self, video_id, webpage):
  66. """Request automatic caption. Redefine in subclasses."""
  67. """returns a tuple of ... """
  68. # return [(err_msg, None, None)]
  69. pass