dplay.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_HTTPError
  6. from ..utils import (
  7. determine_ext,
  8. ExtractorError,
  9. float_or_none,
  10. int_or_none,
  11. unified_timestamp,
  12. )
  13. class DPlayIE(InfoExtractor):
  14. _VALID_URL = r'''(?x)https?://
  15. (?P<domain>
  16. (?:www\.)?(?P<host>d
  17. (?:
  18. play\.(?P<country>dk|fi|jp|se|no)|
  19. iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no)
  20. )
  21. )|
  22. (?P<subdomain_country>es|it)\.dplay\.com
  23. )/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
  24. _TESTS = [{
  25. # non geo restricted, via secure api, unsigned download hls URL
  26. 'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
  27. 'info_dict': {
  28. 'id': '13628',
  29. 'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
  30. 'ext': 'mp4',
  31. 'title': 'Svensken lär sig njuta av livet',
  32. 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
  33. 'duration': 2649.856,
  34. 'timestamp': 1365453720,
  35. 'upload_date': '20130408',
  36. 'creator': 'Kanal 5',
  37. 'series': 'Nugammalt - 77 händelser som format Sverige',
  38. 'season_number': 1,
  39. 'episode_number': 1,
  40. },
  41. 'params': {
  42. 'format': 'bestvideo',
  43. 'skip_download': True,
  44. },
  45. }, {
  46. # geo restricted, via secure api, unsigned download hls URL
  47. 'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
  48. 'info_dict': {
  49. 'id': '104465',
  50. 'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
  51. 'ext': 'mp4',
  52. 'title': 'Ted Bundy: Mind Of A Monster',
  53. 'description': 'md5:8b780f6f18de4dae631668b8a9637995',
  54. 'duration': 5290.027,
  55. 'timestamp': 1570694400,
  56. 'upload_date': '20191010',
  57. 'creator': 'ID - Investigation Discovery',
  58. 'series': 'Ted Bundy: Mind Of A Monster',
  59. 'season_number': 1,
  60. 'episode_number': 1,
  61. },
  62. 'params': {
  63. 'format': 'bestvideo',
  64. 'skip_download': True,
  65. },
  66. }, {
  67. # disco-api
  68. 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
  69. 'info_dict': {
  70. 'id': '40206',
  71. 'display_id': 'i-kongens-klr/sesong-1-episode-7',
  72. 'ext': 'mp4',
  73. 'title': 'Episode 7',
  74. 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
  75. 'duration': 2611.16,
  76. 'timestamp': 1516726800,
  77. 'upload_date': '20180123',
  78. 'series': 'I kongens klær',
  79. 'season_number': 1,
  80. 'episode_number': 7,
  81. },
  82. 'params': {
  83. 'format': 'bestvideo',
  84. 'skip_download': True,
  85. },
  86. 'skip': 'Available for Premium users',
  87. }, {
  88. 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
  89. 'md5': '2b808ffb00fc47b884a172ca5d13053c',
  90. 'info_dict': {
  91. 'id': '6918',
  92. 'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
  93. 'ext': 'mp4',
  94. 'title': 'Luigi Di Maio: la psicosi di Stanislawskij',
  95. 'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
  96. 'thumbnail': r're:^https?://.*\.jpe?g',
  97. 'upload_date': '20160524',
  98. 'timestamp': 1464076800,
  99. 'series': 'Biografie imbarazzanti',
  100. 'season_number': 1,
  101. 'episode': 'Episode 1',
  102. 'episode_number': 1,
  103. },
  104. }, {
  105. 'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/',
  106. 'info_dict': {
  107. 'id': '21652',
  108. 'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1',
  109. 'ext': 'mp4',
  110. 'title': 'Episodio 1',
  111. 'description': 'md5:b9dcff2071086e003737485210675f69',
  112. 'thumbnail': r're:^https?://.*\.png',
  113. 'upload_date': '20180709',
  114. 'timestamp': 1531173540,
  115. 'series': 'La fiebre del oro',
  116. 'season_number': 8,
  117. 'episode': 'Episode 1',
  118. 'episode_number': 1,
  119. },
  120. 'params': {
  121. 'skip_download': True,
  122. },
  123. }, {
  124. 'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
  125. 'only_matching': True,
  126. }, {
  127. 'url': 'https://www.dplay.jp/video/gold-rush/24086',
  128. 'only_matching': True,
  129. }, {
  130. 'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
  131. 'only_matching': True,
  132. }, {
  133. 'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
  134. 'only_matching': True,
  135. }, {
  136. 'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7',
  137. 'only_matching': True,
  138. }, {
  139. 'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
  140. 'only_matching': True,
  141. }, {
  142. 'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1',
  143. 'only_matching': True,
  144. }, {
  145. 'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
  146. 'only_matching': True,
  147. }]
  148. def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
  149. geo_countries = [country.upper()]
  150. self._initialize_geo_bypass({
  151. 'countries': geo_countries,
  152. })
  153. disco_base = 'https://%s/' % disco_host
  154. token = self._download_json(
  155. disco_base + 'token', display_id, 'Downloading token',
  156. query={
  157. 'realm': realm,
  158. })['data']['attributes']['token']
  159. headers = {
  160. 'Referer': url,
  161. 'Authorization': 'Bearer ' + token,
  162. }
  163. video = self._download_json(
  164. disco_base + 'content/videos/' + display_id, display_id,
  165. headers=headers, query={
  166. 'fields[channel]': 'name',
  167. 'fields[image]': 'height,src,width',
  168. 'fields[show]': 'name',
  169. 'fields[tag]': 'name',
  170. 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
  171. 'include': 'images,primaryChannel,show,tags'
  172. })
  173. video_id = video['data']['id']
  174. info = video['data']['attributes']
  175. title = info['name'].strip()
  176. formats = []
  177. try:
  178. streaming = self._download_json(
  179. disco_base + 'playback/videoPlaybackInfo/' + video_id,
  180. display_id, headers=headers)['data']['attributes']['streaming']
  181. except ExtractorError as e:
  182. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
  183. info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
  184. error = info['errors'][0]
  185. error_code = error.get('code')
  186. if error_code == 'access.denied.geoblocked':
  187. self.raise_geo_restricted(countries=geo_countries)
  188. elif error_code == 'access.denied.missingpackage':
  189. self.raise_login_required()
  190. raise ExtractorError(info['errors'][0]['detail'], expected=True)
  191. raise
  192. for format_id, format_dict in streaming.items():
  193. if not isinstance(format_dict, dict):
  194. continue
  195. format_url = format_dict.get('url')
  196. if not format_url:
  197. continue
  198. ext = determine_ext(format_url)
  199. if format_id == 'dash' or ext == 'mpd':
  200. formats.extend(self._extract_mpd_formats(
  201. format_url, display_id, mpd_id='dash', fatal=False))
  202. elif format_id == 'hls' or ext == 'm3u8':
  203. formats.extend(self._extract_m3u8_formats(
  204. format_url, display_id, 'mp4',
  205. entry_protocol='m3u8_native', m3u8_id='hls',
  206. fatal=False))
  207. else:
  208. formats.append({
  209. 'url': format_url,
  210. 'format_id': format_id,
  211. })
  212. self._sort_formats(formats)
  213. creator = series = None
  214. tags = []
  215. thumbnails = []
  216. included = video.get('included') or []
  217. if isinstance(included, list):
  218. for e in included:
  219. attributes = e.get('attributes')
  220. if not attributes:
  221. continue
  222. e_type = e.get('type')
  223. if e_type == 'channel':
  224. creator = attributes.get('name')
  225. elif e_type == 'image':
  226. src = attributes.get('src')
  227. if src:
  228. thumbnails.append({
  229. 'url': src,
  230. 'width': int_or_none(attributes.get('width')),
  231. 'height': int_or_none(attributes.get('height')),
  232. })
  233. if e_type == 'show':
  234. series = attributes.get('name')
  235. elif e_type == 'tag':
  236. name = attributes.get('name')
  237. if name:
  238. tags.append(name)
  239. return {
  240. 'id': video_id,
  241. 'display_id': display_id,
  242. 'title': title,
  243. 'description': info.get('description'),
  244. 'duration': float_or_none(info.get('videoDuration'), 1000),
  245. 'timestamp': unified_timestamp(info.get('publishStart')),
  246. 'series': series,
  247. 'season_number': int_or_none(info.get('seasonNumber')),
  248. 'episode_number': int_or_none(info.get('episodeNumber')),
  249. 'creator': creator,
  250. 'tags': tags,
  251. 'thumbnails': thumbnails,
  252. 'formats': formats,
  253. }
  254. def _real_extract(self, url):
  255. mobj = re.match(self._VALID_URL, url)
  256. display_id = mobj.group('id')
  257. domain = mobj.group('domain').lstrip('www.')
  258. country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
  259. host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
  260. return self._get_disco_api_info(
  261. url, display_id, host, 'dplay' + country, country)