nhk.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import urljoin
  5. class NhkBaseIE(InfoExtractor):
  6. _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
  7. _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
  8. _TYPE_REGEX = r'/(?P<type>video|audio)/'
  9. def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
  10. return self._download_json(
  11. self._API_URL_TEMPLATE % (
  12. 'v' if is_video else 'r',
  13. 'clip' if is_clip else 'esd',
  14. 'episode' if is_episode else 'program',
  15. m_id, lang, '/all' if is_video else ''),
  16. m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
  17. def _extract_episode_info(self, url, episode=None):
  18. fetch_episode = episode is None
  19. lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
  20. if episode_id.isdigit():
  21. episode_id = episode_id[:4] + '-' + episode_id[4:]
  22. is_video = m_type == 'video'
  23. if fetch_episode:
  24. episode = self._call_api(
  25. episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
  26. title = episode.get('sub_title_clean') or episode['sub_title']
  27. def get_clean_field(key):
  28. return episode.get(key + '_clean') or episode.get(key)
  29. series = get_clean_field('title')
  30. thumbnails = []
  31. for s, w, h in [('', 640, 360), ('_l', 1280, 720)]:
  32. img_path = episode.get('image' + s)
  33. if not img_path:
  34. continue
  35. thumbnails.append({
  36. 'id': '%dp' % h,
  37. 'height': h,
  38. 'width': w,
  39. 'url': 'https://www3.nhk.or.jp' + img_path,
  40. })
  41. info = {
  42. 'id': episode_id + '-' + lang,
  43. 'title': '%s - %s' % (series, title) if series and title else title,
  44. 'description': get_clean_field('description'),
  45. 'thumbnails': thumbnails,
  46. 'series': series,
  47. 'episode': title,
  48. }
  49. if is_video:
  50. vod_id = episode['vod_id']
  51. info.update({
  52. '_type': 'url_transparent',
  53. 'ie_key': 'Piksel',
  54. 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + vod_id,
  55. 'id': vod_id,
  56. })
  57. else:
  58. if fetch_episode:
  59. audio_path = episode['audio']['audio']
  60. info['formats'] = self._extract_m3u8_formats(
  61. 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
  62. episode_id, 'm4a', entry_protocol='m3u8_native',
  63. m3u8_id='hls', fatal=False)
  64. for f in info['formats']:
  65. f['language'] = lang
  66. else:
  67. info.update({
  68. '_type': 'url_transparent',
  69. 'ie_key': NhkVodIE.ie_key(),
  70. 'url': url,
  71. })
  72. return info
  73. class NhkVodIE(NhkBaseIE):
  74. _VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
  75. # Content available only for a limited period of time. Visit
  76. # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
  77. _TESTS = [{
  78. # video clip
  79. 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
  80. 'md5': '256a1be14f48d960a7e61e2532d95ec3',
  81. 'info_dict': {
  82. 'id': 'a95j5iza',
  83. 'ext': 'mp4',
  84. 'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
  85. 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
  86. 'timestamp': 1565965194,
  87. 'upload_date': '20190816',
  88. },
  89. }, {
  90. # audio clip
  91. 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/r_inventions-20201104-1/',
  92. 'info_dict': {
  93. 'id': 'r_inventions-20201104-1-en',
  94. 'ext': 'm4a',
  95. 'title': "Japan's Top Inventions - Miniature Video Cameras",
  96. 'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b',
  97. },
  98. 'params': {
  99. # m3u8 download
  100. 'skip_download': True,
  101. },
  102. }, {
  103. 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
  104. 'only_matching': True,
  105. }, {
  106. 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
  107. 'only_matching': True,
  108. }, {
  109. 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
  110. 'only_matching': True,
  111. }, {
  112. 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
  113. 'only_matching': True,
  114. }]
  115. def _real_extract(self, url):
  116. return self._extract_episode_info(url)
  117. class NhkVodProgramIE(NhkBaseIE):
  118. _VALID_URL = r'%s/program%s(?P<id>[0-9a-z]+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
  119. _TESTS = [{
  120. # video program episodes
  121. 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
  122. 'info_dict': {
  123. 'id': 'japanrailway',
  124. 'title': 'Japan Railway Journal',
  125. },
  126. 'playlist_mincount': 1,
  127. }, {
  128. # video program clips
  129. 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip',
  130. 'info_dict': {
  131. 'id': 'japanrailway',
  132. 'title': 'Japan Railway Journal',
  133. },
  134. 'playlist_mincount': 5,
  135. }, {
  136. 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/10yearshayaomiyazaki/',
  137. 'only_matching': True,
  138. }, {
  139. # audio program
  140. 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/audio/listener/',
  141. 'only_matching': True,
  142. }]
  143. def _real_extract(self, url):
  144. lang, m_type, program_id, episode_type = re.match(self._VALID_URL, url).groups()
  145. episodes = self._call_api(
  146. program_id, lang, m_type == 'video', False, episode_type == 'clip')
  147. entries = []
  148. for episode in episodes:
  149. episode_path = episode.get('url')
  150. if not episode_path:
  151. continue
  152. entries.append(self._extract_episode_info(
  153. urljoin(url, episode_path), episode))
  154. program_title = None
  155. if entries:
  156. program_title = entries[0].get('series')
  157. return self.playlist_result(entries, program_id, program_title)