globalplayer.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. clean_html,
  6. join_nonempty,
  7. merge_dicts,
  8. parse_duration,
  9. str_or_none,
  10. T,
  11. traverse_obj,
  12. unified_strdate,
  13. unified_timestamp,
  14. urlhandle_detect_ext,
  15. )
  16. class GlobalPlayerBaseIE(InfoExtractor):
  17. import re
  18. @classmethod
  19. def _match_valid_url(cls, url):
  20. return cls.re.match(cls._VALID_URL, url)
  21. def _get_page_props(self, url, video_id):
  22. webpage = self._download_webpage(url, video_id)
  23. return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
  24. def _request_ext(self, url, video_id):
  25. return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests
  26. url, video_id, note='Determining source extension'))
  27. @staticmethod
  28. def _clean_desc(x):
  29. x = clean_html(x)
  30. if x:
  31. x = x.replace('\xa0', ' ')
  32. return x
  33. def _extract_audio(self, episode, series):
  34. return merge_dicts({
  35. 'vcodec': 'none',
  36. }, traverse_obj(series, {
  37. 'series': 'title',
  38. 'series_id': 'id',
  39. 'thumbnail': 'imageUrl',
  40. 'uploader': 'itunesAuthor', # podcasts only
  41. }), traverse_obj(episode, {
  42. 'id': 'id',
  43. 'description': ('description', T(self._clean_desc)),
  44. 'duration': ('duration', T(parse_duration)),
  45. 'thumbnail': 'imageUrl',
  46. 'url': 'streamUrl',
  47. 'timestamp': (('pubDate', 'startDate'), T(unified_timestamp)),
  48. 'title': 'title',
  49. }, get_all=False), rev=True)
  50. class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
  51. _VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
  52. _TESTS = [{
  53. 'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
  54. 'info_dict': {
  55. 'id': '2mx1E',
  56. 'ext': 'aac',
  57. 'display_id': 'smoothchill-uk',
  58. 'title': 're:^Smooth Chill.+$',
  59. 'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
  60. 'description': 'Music To Chill To',
  61. # 'live_status': 'is_live',
  62. 'is_live': True,
  63. },
  64. }, {
  65. # national station
  66. 'url': 'https://www.globalplayer.com/live/heart/uk/',
  67. 'info_dict': {
  68. 'id': '2mwx4',
  69. 'ext': 'aac',
  70. 'description': 'turn up the feel good!',
  71. 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
  72. # 'live_status': 'is_live',
  73. 'is_live': True,
  74. 'title': 're:^Heart UK.+$',
  75. 'display_id': 'heart-uk',
  76. },
  77. }, {
  78. # regional variation
  79. 'url': 'https://www.globalplayer.com/live/heart/london/',
  80. 'info_dict': {
  81. 'id': 'AMqg',
  82. 'ext': 'aac',
  83. 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
  84. 'title': 're:^Heart London.+$',
  85. # 'live_status': 'is_live',
  86. 'is_live': True,
  87. 'display_id': 'heart-london',
  88. 'description': 'turn up the feel good!',
  89. },
  90. }]
  91. def _real_extract(self, url):
  92. video_id = self._match_id(url)
  93. station = self._get_page_props(url, video_id)['station']
  94. stream_url = station['streamUrl']
  95. return merge_dicts({
  96. 'id': station['id'],
  97. 'display_id': (
  98. join_nonempty('brandSlug', 'slug', from_dict=station)
  99. or station.get('legacyStationPrefix')),
  100. 'url': stream_url,
  101. 'ext': self._request_ext(stream_url, video_id),
  102. 'vcodec': 'none',
  103. 'is_live': True,
  104. }, {
  105. 'title': self._live_title(traverse_obj(
  106. station, (('name', 'brandName'), T(str_or_none)),
  107. get_all=False)),
  108. }, traverse_obj(station, {
  109. 'description': 'tagline',
  110. 'thumbnail': 'brandLogo',
  111. }), rev=True)
  112. class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
  113. _VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
  114. _TESTS = [{
  115. # "live playlist"
  116. 'url': 'https://www.globalplayer.com/playlists/8bLk/',
  117. 'info_dict': {
  118. 'id': '8bLk',
  119. 'ext': 'aac',
  120. # 'live_status': 'is_live',
  121. 'is_live': True,
  122. 'description': r're:(?s).+\bclassical\b.+\bClassic FM Hall [oO]f Fame\b',
  123. 'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
  124. 'title': 're:Classic FM Hall of Fame.+$'
  125. },
  126. }]
  127. def _real_extract(self, url):
  128. video_id = self._match_id(url)
  129. station = self._get_page_props(url, video_id)['playlistData']
  130. stream_url = station['streamUrl']
  131. return merge_dicts({
  132. 'id': video_id,
  133. 'url': stream_url,
  134. 'ext': self._request_ext(stream_url, video_id),
  135. 'vcodec': 'none',
  136. 'is_live': True,
  137. }, traverse_obj(station, {
  138. 'title': 'title',
  139. 'description': ('description', T(self._clean_desc)),
  140. 'thumbnail': 'image',
  141. }), rev=True)
  142. class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
  143. _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
  144. _TESTS = [{
  145. # podcast
  146. 'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
  147. 'playlist_mincount': 5,
  148. 'info_dict': {
  149. 'id': '42KuaM',
  150. 'title': 'Filthy Ritual',
  151. 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
  152. 'categories': ['Society & Culture', 'True Crime'],
  153. 'uploader': 'Global',
  154. 'description': r're:(?s).+\bscam\b.+?\bseries available now\b',
  155. },
  156. }, {
  157. # radio catchup
  158. 'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
  159. 'playlist_mincount': 2,
  160. 'info_dict': {
  161. 'id': '46vyD7z',
  162. 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
  163. 'title': 'Nick Ferrari',
  164. 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
  165. },
  166. }]
  167. def _real_extract(self, url):
  168. video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
  169. props = self._get_page_props(url, video_id)
  170. series = props['podcastInfo'] if podcast else props['catchupInfo']
  171. return merge_dicts({
  172. '_type': 'playlist',
  173. 'id': video_id,
  174. 'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
  175. series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
  176. 'categories': traverse_obj(series, ('categories', Ellipsis, 'name')) or None,
  177. }, traverse_obj(series, {
  178. 'description': ('description', T(self._clean_desc)),
  179. 'thumbnail': 'imageUrl',
  180. 'title': 'title',
  181. 'uploader': 'itunesAuthor', # podcasts only
  182. }), rev=True)
  183. class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
  184. _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
  185. _TESTS = [{
  186. # podcast
  187. 'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
  188. 'info_dict': {
  189. 'id': '7DrfNnE',
  190. 'ext': 'mp3',
  191. 'title': 'Filthy Ritual - Trailer',
  192. 'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
  193. 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
  194. 'duration': 225.0,
  195. 'timestamp': 1681254900,
  196. 'series': 'Filthy Ritual',
  197. 'series_id': '42KuaM',
  198. 'upload_date': '20230411',
  199. 'uploader': 'Global',
  200. },
  201. }, {
  202. # radio catchup
  203. 'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
  204. 'only_matching': True,
  205. # expired: refresh the details with a current show for a full test
  206. 'info_dict': {
  207. 'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
  208. 'ext': 'm4a',
  209. 'timestamp': 1682056800,
  210. 'series': 'Nick Ferrari',
  211. 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
  212. 'upload_date': '20230421',
  213. 'series_id': '46vyD7z',
  214. 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
  215. 'title': 'Nick Ferrari',
  216. 'duration': 10800.0,
  217. },
  218. }]
  219. def _real_extract(self, url):
  220. video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
  221. props = self._get_page_props(url, video_id)
  222. episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
  223. return self._extract_audio(
  224. episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
  225. class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
  226. _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
  227. _TESTS = [{
  228. 'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
  229. 'info_dict': {
  230. 'id': '2JsSZ7Gm2uP',
  231. 'ext': 'mp4',
  232. 'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
  233. 'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
  234. 'upload_date': '20230420',
  235. 'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
  236. },
  237. }]
  238. def _real_extract(self, url):
  239. video_id = self._match_id(url)
  240. meta = self._get_page_props(url, video_id)['videoData']
  241. return merge_dicts({
  242. 'id': video_id,
  243. }, traverse_obj(meta, {
  244. 'url': 'url',
  245. 'thumbnail': ('image', 'url'),
  246. 'title': 'title',
  247. 'upload_date': ('publish_date', T(unified_strdate)),
  248. 'description': 'description',
  249. }), rev=True)