amara.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. class AmaraIE(InfoExtractor):
  5. _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
  6. _TESTS = [
  7. {
  8. 'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
  9. 'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
  10. 'info_dict': {
  11. 'id': 'h6ZuVdvYnfE',
  12. 'ext': 'mp4',
  13. 'title': 'Why jury trials are becoming less common',
  14. 'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
  15. 'thumbnail': r're:^https?://.*\.jpg$',
  16. 'subtitles': dict,
  17. 'upload_date': '20160813',
  18. 'uploader': 'PBS NewsHour',
  19. 'uploader_id': 'PBSNewsHour'
  20. }
  21. },
  22. {
  23. 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
  24. 'md5': '99392c75fa05d432a8f11df03612195e',
  25. 'info_dict': {
  26. 'id': '18622084',
  27. 'ext': 'mov',
  28. 'title': 'Vimeo at CES 2011!',
  29. 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
  30. 'thumbnail': r're:^https?://.*\.jpg$',
  31. 'subtitles': dict,
  32. 'timestamp': 1294649110,
  33. 'upload_date': '20110110',
  34. 'uploader': 'Sam Morrill',
  35. 'uploader_id': 'sammorrill'
  36. }
  37. },
  38. {
  39. 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
  40. 'md5': 'd3970f08512738ee60c5807311ff5d3f',
  41. 'info_dict': {
  42. 'id': 'ChimamandaAdichie_2009G-transcript',
  43. 'ext': 'mp4',
  44. 'title': 'The danger of a single story',
  45. 'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
  46. 'thumbnail': r're:^https?://.*\.jpg$',
  47. 'subtitles': dict,
  48. 'upload_date': '20131206'
  49. }
  50. }
  51. ]
  52. def get_subtitles_for_language(self, language):
  53. return [{
  54. 'ext': type,
  55. 'url': language['subtitles_uri'].replace('format=json', 'format=' + type)
  56. } for type in ['vtt', 'srt', 'json']]
  57. def _real_extract(self, url):
  58. video_id = self._match_id(url)
  59. meta = self._download_json('https://amara.org/api/videos/%s/' % video_id, video_id, query={'format': 'json'})
  60. video_url = meta.get('all_urls')[0]
  61. subtitles = dict([(language['code'], self.get_subtitles_for_language(language)) for language in meta.get('languages', []) if language['published']])
  62. return {
  63. '_type': 'url_transparent',
  64. 'url': video_url,
  65. 'id': video_id,
  66. 'subtitles': subtitles,
  67. 'title': meta['title'],
  68. 'description': meta.get('description'),
  69. 'thumbnail': meta.get('thumbnail')
  70. }