dplay.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. from .common import InfoExtractor
  6. from ..compat import compat_HTTPError
  7. from ..utils import (
  8. determine_ext,
  9. ExtractorError,
  10. float_or_none,
  11. int_or_none,
  12. unified_timestamp,
  13. )
  14. class DPlayIE(InfoExtractor):
  15. _VALID_URL = r'''(?x)https?://
  16. (?P<domain>
  17. (?:www\.)?(?P<host>d
  18. (?:
  19. play\.(?P<country>dk|fi|jp|se|no)|
  20. iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no)
  21. )
  22. )|
  23. (?P<subdomain_country>es|it)\.dplay\.com
  24. )/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
  25. _TESTS = [{
  26. # non geo restricted, via secure api, unsigned download hls URL
  27. 'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
  28. 'info_dict': {
  29. 'id': '13628',
  30. 'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
  31. 'ext': 'mp4',
  32. 'title': 'Svensken lär sig njuta av livet',
  33. 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
  34. 'duration': 2649.856,
  35. 'timestamp': 1365453720,
  36. 'upload_date': '20130408',
  37. 'creator': 'Kanal 5',
  38. 'series': 'Nugammalt - 77 händelser som format Sverige',
  39. 'season_number': 1,
  40. 'episode_number': 1,
  41. },
  42. 'params': {
  43. 'format': 'bestvideo',
  44. 'skip_download': True,
  45. },
  46. }, {
  47. # geo restricted, via secure api, unsigned download hls URL
  48. 'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
  49. 'info_dict': {
  50. 'id': '104465',
  51. 'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
  52. 'ext': 'mp4',
  53. 'title': 'Ted Bundy: Mind Of A Monster',
  54. 'description': 'md5:8b780f6f18de4dae631668b8a9637995',
  55. 'duration': 5290.027,
  56. 'timestamp': 1570694400,
  57. 'upload_date': '20191010',
  58. 'creator': 'ID - Investigation Discovery',
  59. 'series': 'Ted Bundy: Mind Of A Monster',
  60. 'season_number': 1,
  61. 'episode_number': 1,
  62. },
  63. 'params': {
  64. 'format': 'bestvideo',
  65. 'skip_download': True,
  66. },
  67. }, {
  68. # disco-api
  69. 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
  70. 'info_dict': {
  71. 'id': '40206',
  72. 'display_id': 'i-kongens-klr/sesong-1-episode-7',
  73. 'ext': 'mp4',
  74. 'title': 'Episode 7',
  75. 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
  76. 'duration': 2611.16,
  77. 'timestamp': 1516726800,
  78. 'upload_date': '20180123',
  79. 'series': 'I kongens klær',
  80. 'season_number': 1,
  81. 'episode_number': 7,
  82. },
  83. 'params': {
  84. 'format': 'bestvideo',
  85. 'skip_download': True,
  86. },
  87. 'skip': 'Available for Premium users',
  88. }, {
  89. 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
  90. 'md5': '2b808ffb00fc47b884a172ca5d13053c',
  91. 'info_dict': {
  92. 'id': '6918',
  93. 'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
  94. 'ext': 'mp4',
  95. 'title': 'Luigi Di Maio: la psicosi di Stanislawskij',
  96. 'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
  97. 'thumbnail': r're:^https?://.*\.jpe?g',
  98. 'upload_date': '20160524',
  99. 'timestamp': 1464076800,
  100. 'series': 'Biografie imbarazzanti',
  101. 'season_number': 1,
  102. 'episode': 'Episode 1',
  103. 'episode_number': 1,
  104. },
  105. }, {
  106. 'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/',
  107. 'info_dict': {
  108. 'id': '21652',
  109. 'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1',
  110. 'ext': 'mp4',
  111. 'title': 'Episodio 1',
  112. 'description': 'md5:b9dcff2071086e003737485210675f69',
  113. 'thumbnail': r're:^https?://.*\.png',
  114. 'upload_date': '20180709',
  115. 'timestamp': 1531173540,
  116. 'series': 'La fiebre del oro',
  117. 'season_number': 8,
  118. 'episode': 'Episode 1',
  119. 'episode_number': 1,
  120. },
  121. 'params': {
  122. 'skip_download': True,
  123. },
  124. }, {
  125. 'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
  126. 'only_matching': True,
  127. }, {
  128. 'url': 'https://www.dplay.jp/video/gold-rush/24086',
  129. 'only_matching': True,
  130. }, {
  131. 'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
  132. 'only_matching': True,
  133. }, {
  134. 'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
  135. 'only_matching': True,
  136. }, {
  137. 'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7',
  138. 'only_matching': True,
  139. }, {
  140. 'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
  141. 'only_matching': True,
  142. }, {
  143. 'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1',
  144. 'only_matching': True,
  145. }, {
  146. 'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
  147. 'only_matching': True,
  148. }]
  149. def _process_errors(self, e, geo_countries):
  150. info = self._parse_json(e.cause.read().decode('utf-8'), None)
  151. error = info['errors'][0]
  152. error_code = error.get('code')
  153. if error_code == 'access.denied.geoblocked':
  154. self.raise_geo_restricted(countries=geo_countries)
  155. elif error_code in ('access.denied.missingpackage', 'invalid.token'):
  156. raise ExtractorError(
  157. 'This video is only available for registered users. You may want to use --cookies.', expected=True)
  158. raise ExtractorError(info['errors'][0]['detail'], expected=True)
  159. def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
  160. headers['Authorization'] = 'Bearer ' + self._download_json(
  161. disco_base + 'token', display_id, 'Downloading token',
  162. query={
  163. 'realm': realm,
  164. })['data']['attributes']['token']
  165. def _download_video_playback_info(self, disco_base, video_id, headers):
  166. streaming = self._download_json(
  167. disco_base + 'playback/videoPlaybackInfo/' + video_id,
  168. video_id, headers=headers)['data']['attributes']['streaming']
  169. streaming_list = []
  170. for format_id, format_dict in streaming.items():
  171. streaming_list.append({
  172. 'type': format_id,
  173. 'url': format_dict.get('url'),
  174. })
  175. return streaming_list
  176. def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
  177. geo_countries = [country.upper()]
  178. self._initialize_geo_bypass({
  179. 'countries': geo_countries,
  180. })
  181. disco_base = 'https://%s/' % disco_host
  182. headers = {
  183. 'Referer': url,
  184. }
  185. self._update_disco_api_headers(headers, disco_base, display_id, realm)
  186. try:
  187. video = self._download_json(
  188. disco_base + 'content/videos/' + display_id, display_id,
  189. headers=headers, query={
  190. 'fields[channel]': 'name',
  191. 'fields[image]': 'height,src,width',
  192. 'fields[show]': 'name',
  193. 'fields[tag]': 'name',
  194. 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
  195. 'include': 'images,primaryChannel,show,tags'
  196. })
  197. except ExtractorError as e:
  198. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
  199. self._process_errors(e, geo_countries)
  200. raise
  201. video_id = video['data']['id']
  202. info = video['data']['attributes']
  203. title = info['name'].strip()
  204. formats = []
  205. try:
  206. streaming = self._download_video_playback_info(
  207. disco_base, video_id, headers)
  208. except ExtractorError as e:
  209. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
  210. self._process_errors(e, geo_countries)
  211. raise
  212. for format_dict in streaming:
  213. if not isinstance(format_dict, dict):
  214. continue
  215. format_url = format_dict.get('url')
  216. if not format_url:
  217. continue
  218. format_id = format_dict.get('type')
  219. ext = determine_ext(format_url)
  220. if format_id == 'dash' or ext == 'mpd':
  221. formats.extend(self._extract_mpd_formats(
  222. format_url, display_id, mpd_id='dash', fatal=False))
  223. elif format_id == 'hls' or ext == 'm3u8':
  224. formats.extend(self._extract_m3u8_formats(
  225. format_url, display_id, 'mp4',
  226. entry_protocol='m3u8_native', m3u8_id='hls',
  227. fatal=False))
  228. else:
  229. formats.append({
  230. 'url': format_url,
  231. 'format_id': format_id,
  232. })
  233. self._sort_formats(formats)
  234. creator = series = None
  235. tags = []
  236. thumbnails = []
  237. included = video.get('included') or []
  238. if isinstance(included, list):
  239. for e in included:
  240. attributes = e.get('attributes')
  241. if not attributes:
  242. continue
  243. e_type = e.get('type')
  244. if e_type == 'channel':
  245. creator = attributes.get('name')
  246. elif e_type == 'image':
  247. src = attributes.get('src')
  248. if src:
  249. thumbnails.append({
  250. 'url': src,
  251. 'width': int_or_none(attributes.get('width')),
  252. 'height': int_or_none(attributes.get('height')),
  253. })
  254. if e_type == 'show':
  255. series = attributes.get('name')
  256. elif e_type == 'tag':
  257. name = attributes.get('name')
  258. if name:
  259. tags.append(name)
  260. return {
  261. 'id': video_id,
  262. 'display_id': display_id,
  263. 'title': title,
  264. 'description': info.get('description'),
  265. 'duration': float_or_none(info.get('videoDuration'), 1000),
  266. 'timestamp': unified_timestamp(info.get('publishStart')),
  267. 'series': series,
  268. 'season_number': int_or_none(info.get('seasonNumber')),
  269. 'episode_number': int_or_none(info.get('episodeNumber')),
  270. 'creator': creator,
  271. 'tags': tags,
  272. 'thumbnails': thumbnails,
  273. 'formats': formats,
  274. }
  275. def _real_extract(self, url):
  276. mobj = re.match(self._VALID_URL, url)
  277. display_id = mobj.group('id')
  278. domain = mobj.group('domain').lstrip('www.')
  279. country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
  280. host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
  281. return self._get_disco_api_info(
  282. url, display_id, host, 'dplay' + country, country)
  283. class DiscoveryPlusIE(DPlayIE):
  284. _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video/(?P<id>[^/]+/[^/]+)'
  285. _TESTS = [{
  286. 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
  287. 'info_dict': {
  288. 'id': '1140794',
  289. 'display_id': 'property-brothers-forever-home/food-and-family',
  290. 'ext': 'mp4',
  291. 'title': 'Food and Family',
  292. 'description': 'The brothers help a Richmond family expand their single-level home.',
  293. 'duration': 2583.113,
  294. 'timestamp': 1609304400,
  295. 'upload_date': '20201230',
  296. 'creator': 'HGTV',
  297. 'series': 'Property Brothers: Forever Home',
  298. 'season_number': 1,
  299. 'episode_number': 1,
  300. },
  301. 'skip': 'Available for Premium users',
  302. }]
  303. def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
  304. headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
  305. def _download_video_playback_info(self, disco_base, video_id, headers):
  306. return self._download_json(
  307. disco_base + 'playback/v3/videoPlaybackInfo',
  308. video_id, headers=headers, data=json.dumps({
  309. 'deviceInfo': {
  310. 'adBlocker': False,
  311. },
  312. 'videoId': video_id,
  313. 'wisteriaProperties': {
  314. 'platform': 'desktop',
  315. },
  316. }).encode('utf-8'))['data']['attributes']['streaming']
  317. def _real_extract(self, url):
  318. display_id = self._match_id(url)
  319. return self._get_disco_api_info(
  320. url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')