acast.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. clean_html,
  7. int_or_none,
  8. parse_iso8601,
  9. )
  10. class ACastBaseIE(InfoExtractor):
  11. def _extract_episode(self, episode, show_info):
  12. title = episode['title']
  13. info = {
  14. 'id': episode['id'],
  15. 'display_id': episode.get('episodeUrl'),
  16. 'url': episode['url'],
  17. 'title': title,
  18. 'description': clean_html(episode.get('description') or episode.get('summary')),
  19. 'thumbnail': episode.get('image'),
  20. 'timestamp': parse_iso8601(episode.get('publishDate')),
  21. 'duration': int_or_none(episode.get('duration')),
  22. 'filesize': int_or_none(episode.get('contentLength')),
  23. 'season_number': int_or_none(episode.get('season')),
  24. 'episode': title,
  25. 'episode_number': int_or_none(episode.get('episode')),
  26. }
  27. info.update(show_info)
  28. return info
  29. def _extract_show_info(self, show):
  30. return {
  31. 'creator': show.get('author'),
  32. 'series': show.get('title'),
  33. }
  34. def _call_api(self, path, video_id, query=None):
  35. return self._download_json(
  36. 'https://feeder.acast.com/api/v1/shows/' + path, video_id, query=query)
  37. class ACastIE(ACastBaseIE):
  38. IE_NAME = 'acast'
  39. _VALID_URL = r'''(?x)
  40. https?://
  41. (?:
  42. (?:(?:embed|www)\.)?acast\.com/|
  43. play\.acast\.com/s/
  44. )
  45. (?P<channel>[^/]+)/(?P<id>[^/#?]+)
  46. '''
  47. _TESTS = [{
  48. 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
  49. 'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
  50. 'info_dict': {
  51. 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
  52. 'ext': 'mp3',
  53. 'title': '2. Raggarmordet - Röster ur det förflutna',
  54. 'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67',
  55. 'timestamp': 1477346700,
  56. 'upload_date': '20161024',
  57. 'duration': 2766,
  58. 'creator': 'Anton Berg & Martin Johnson',
  59. 'series': 'Spår',
  60. 'episode': '2. Raggarmordet - Röster ur det förflutna',
  61. }
  62. }, {
  63. 'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
  64. 'only_matching': True,
  65. }, {
  66. 'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
  67. 'only_matching': True,
  68. }, {
  69. 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
  70. 'only_matching': True,
  71. }]
  72. def _real_extract(self, url):
  73. channel, display_id = re.match(self._VALID_URL, url).groups()
  74. episode = self._call_api(
  75. '%s/episodes/%s' % (channel, display_id),
  76. display_id, {'showInfo': 'true'})
  77. return self._extract_episode(
  78. episode, self._extract_show_info(episode.get('show') or {}))
  79. class ACastChannelIE(ACastBaseIE):
  80. IE_NAME = 'acast:channel'
  81. _VALID_URL = r'''(?x)
  82. https?://
  83. (?:
  84. (?:www\.)?acast\.com/|
  85. play\.acast\.com/s/
  86. )
  87. (?P<id>[^/#?]+)
  88. '''
  89. _TESTS = [{
  90. 'url': 'https://www.acast.com/todayinfocus',
  91. 'info_dict': {
  92. 'id': '4efc5294-5385-4847-98bd-519799ce5786',
  93. 'title': 'Today in Focus',
  94. 'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
  95. },
  96. 'playlist_mincount': 200,
  97. }, {
  98. 'url': 'http://play.acast.com/s/ft-banking-weekly',
  99. 'only_matching': True,
  100. }]
  101. @classmethod
  102. def suitable(cls, url):
  103. return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
  104. def _real_extract(self, url):
  105. show_slug = self._match_id(url)
  106. show = self._call_api(show_slug, show_slug)
  107. show_info = self._extract_show_info(show)
  108. entries = []
  109. for episode in (show.get('episodes') or []):
  110. entries.append(self._extract_episode(episode, show_info))
  111. return self.playlist_result(
  112. entries, show.get('id'), show.get('title'), show.get('description'))