nrk.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_str,
  7. compat_urllib_parse_unquote,
  8. )
  9. from ..utils import (
  10. ExtractorError,
  11. int_or_none,
  12. js_to_json,
  13. NO_DEFAULT,
  14. parse_age_limit,
  15. parse_duration,
  16. try_get,
  17. )
  18. class NRKBaseIE(InfoExtractor):
  19. _GEO_COUNTRIES = ['NO']
  20. _api_host = None
  21. def _real_extract(self, url):
  22. video_id = self._match_id(url)
  23. api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
  24. for api_host in api_hosts:
  25. data = self._download_json(
  26. 'http://%s/mediaelement/%s' % (api_host, video_id),
  27. video_id, 'Downloading mediaelement JSON',
  28. fatal=api_host == api_hosts[-1])
  29. if not data:
  30. continue
  31. self._api_host = api_host
  32. break
  33. title = data.get('fullTitle') or data.get('mainTitle') or data['title']
  34. video_id = data.get('id') or video_id
  35. entries = []
  36. conviva = data.get('convivaStatistics') or {}
  37. live = (data.get('mediaElementType') == 'Live'
  38. or data.get('isLive') is True or conviva.get('isLive'))
  39. def make_title(t):
  40. return self._live_title(t) if live else t
  41. media_assets = data.get('mediaAssets')
  42. if media_assets and isinstance(media_assets, list):
  43. def video_id_and_title(idx):
  44. return ((video_id, title) if len(media_assets) == 1
  45. else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
  46. for num, asset in enumerate(media_assets, 1):
  47. asset_url = asset.get('url')
  48. if not asset_url:
  49. continue
  50. formats = self._extract_akamai_formats(asset_url, video_id)
  51. if not formats:
  52. continue
  53. self._sort_formats(formats)
  54. # Some f4m streams may not work with hdcore in fragments' URLs
  55. for f in formats:
  56. extra_param = f.get('extra_param_to_segment_url')
  57. if extra_param and 'hdcore' in extra_param:
  58. del f['extra_param_to_segment_url']
  59. entry_id, entry_title = video_id_and_title(num)
  60. duration = parse_duration(asset.get('duration'))
  61. subtitles = {}
  62. for subtitle in ('webVtt', 'timedText'):
  63. subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
  64. if subtitle_url:
  65. subtitles.setdefault('no', []).append({
  66. 'url': compat_urllib_parse_unquote(subtitle_url)
  67. })
  68. entries.append({
  69. 'id': asset.get('carrierId') or entry_id,
  70. 'title': make_title(entry_title),
  71. 'duration': duration,
  72. 'subtitles': subtitles,
  73. 'formats': formats,
  74. })
  75. if not entries:
  76. media_url = data.get('mediaUrl')
  77. if media_url:
  78. formats = self._extract_akamai_formats(media_url, video_id)
  79. self._sort_formats(formats)
  80. duration = parse_duration(data.get('duration'))
  81. entries = [{
  82. 'id': video_id,
  83. 'title': make_title(title),
  84. 'duration': duration,
  85. 'formats': formats,
  86. }]
  87. if not entries:
  88. MESSAGES = {
  89. 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
  90. 'ProgramRightsHasExpired': 'Programmet har gått ut',
  91. 'NoProgramRights': 'Ikke tilgjengelig',
  92. 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
  93. }
  94. message_type = data.get('messageType', '')
  95. # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
  96. if 'IsGeoBlocked' in message_type:
  97. self.raise_geo_restricted(
  98. msg=MESSAGES.get('ProgramIsGeoBlocked'),
  99. countries=self._GEO_COUNTRIES)
  100. raise ExtractorError(
  101. '%s said: %s' % (self.IE_NAME, MESSAGES.get(
  102. message_type, message_type)),
  103. expected=True)
  104. series = conviva.get('seriesName') or data.get('seriesTitle')
  105. episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
  106. season_number = None
  107. episode_number = None
  108. if data.get('mediaElementType') == 'Episode':
  109. _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
  110. data.get('relativeOriginUrl', '')
  111. EPISODENUM_RE = [
  112. r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
  113. r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
  114. ]
  115. season_number = int_or_none(self._search_regex(
  116. EPISODENUM_RE, _season_episode, 'season number',
  117. default=None, group='season'))
  118. episode_number = int_or_none(self._search_regex(
  119. EPISODENUM_RE, _season_episode, 'episode number',
  120. default=None, group='episode'))
  121. thumbnails = None
  122. images = data.get('images')
  123. if images and isinstance(images, dict):
  124. web_images = images.get('webImages')
  125. if isinstance(web_images, list):
  126. thumbnails = [{
  127. 'url': image['imageUrl'],
  128. 'width': int_or_none(image.get('width')),
  129. 'height': int_or_none(image.get('height')),
  130. } for image in web_images if image.get('imageUrl')]
  131. description = data.get('description')
  132. category = data.get('mediaAnalytics', {}).get('category')
  133. common_info = {
  134. 'description': description,
  135. 'series': series,
  136. 'episode': episode,
  137. 'season_number': season_number,
  138. 'episode_number': episode_number,
  139. 'categories': [category] if category else None,
  140. 'age_limit': parse_age_limit(data.get('legalAge')),
  141. 'thumbnails': thumbnails,
  142. }
  143. vcodec = 'none' if data.get('mediaType') == 'Audio' else None
  144. for entry in entries:
  145. entry.update(common_info)
  146. for f in entry['formats']:
  147. f['vcodec'] = vcodec
  148. points = data.get('shortIndexPoints')
  149. if isinstance(points, list):
  150. chapters = []
  151. for next_num, point in enumerate(points, start=1):
  152. if not isinstance(point, dict):
  153. continue
  154. start_time = parse_duration(point.get('startPoint'))
  155. if start_time is None:
  156. continue
  157. end_time = parse_duration(
  158. data.get('duration')
  159. if next_num == len(points)
  160. else points[next_num].get('startPoint'))
  161. if end_time is None:
  162. continue
  163. chapters.append({
  164. 'start_time': start_time,
  165. 'end_time': end_time,
  166. 'title': point.get('title'),
  167. })
  168. if chapters and len(entries) == 1:
  169. entries[0]['chapters'] = chapters
  170. return self.playlist_result(entries, video_id, title, description)
  171. class NRKIE(NRKBaseIE):
  172. _VALID_URL = r'''(?x)
  173. (?:
  174. nrk:|
  175. https?://
  176. (?:
  177. (?:www\.)?nrk\.no/video/PS\*|
  178. v8[-.]psapi\.nrk\.no/mediaelement/
  179. )
  180. )
  181. (?P<id>[^?#&]+)
  182. '''
  183. _API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
  184. _TESTS = [{
  185. # video
  186. 'url': 'http://www.nrk.no/video/PS*150533',
  187. 'md5': '706f34cdf1322577589e369e522b50ef',
  188. 'info_dict': {
  189. 'id': '150533',
  190. 'ext': 'mp4',
  191. 'title': 'Dompap og andre fugler i Piip-Show',
  192. 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
  193. 'duration': 262,
  194. }
  195. }, {
  196. # audio
  197. 'url': 'http://www.nrk.no/video/PS*154915',
  198. # MD5 is unstable
  199. 'info_dict': {
  200. 'id': '154915',
  201. 'ext': 'flv',
  202. 'title': 'Slik høres internett ut når du er blind',
  203. 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
  204. 'duration': 20,
  205. }
  206. }, {
  207. 'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
  208. 'only_matching': True,
  209. }, {
  210. 'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
  211. 'only_matching': True,
  212. }, {
  213. 'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
  214. 'only_matching': True,
  215. }]
  216. class NRKTVIE(NRKBaseIE):
  217. IE_DESC = 'NRK TV and NRK Radio'
  218. _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
  219. _VALID_URL = r'''(?x)
  220. https?://
  221. (?:tv|radio)\.nrk(?:super)?\.no/
  222. (?:serie(?:/[^/]+){1,2}|program)/
  223. (?![Ee]pisodes)%s
  224. (?:/\d{2}-\d{2}-\d{4})?
  225. (?:\#del=(?P<part_id>\d+))?
  226. ''' % _EPISODE_RE
  227. _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
  228. _TESTS = [{
  229. 'url': 'https://tv.nrk.no/program/MDDP12000117',
  230. 'md5': '8270824df46ec629b66aeaa5796b36fb',
  231. 'info_dict': {
  232. 'id': 'MDDP12000117AA',
  233. 'ext': 'mp4',
  234. 'title': 'Alarm Trolltunga',
  235. 'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
  236. 'duration': 2223,
  237. 'age_limit': 6,
  238. },
  239. }, {
  240. 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
  241. 'md5': '9a167e54d04671eb6317a37b7bc8a280',
  242. 'info_dict': {
  243. 'id': 'MUHH48000314AA',
  244. 'ext': 'mp4',
  245. 'title': '20 spørsmål 23.05.2014',
  246. 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
  247. 'duration': 1741,
  248. 'series': '20 spørsmål',
  249. 'episode': '23.05.2014',
  250. },
  251. 'skip': 'NoProgramRights',
  252. }, {
  253. 'url': 'https://tv.nrk.no/program/mdfp15000514',
  254. 'info_dict': {
  255. 'id': 'MDFP15000514CA',
  256. 'ext': 'mp4',
  257. 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
  258. 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
  259. 'duration': 4605,
  260. 'series': 'Kunnskapskanalen',
  261. 'episode': '24.05.2014',
  262. },
  263. 'params': {
  264. 'skip_download': True,
  265. },
  266. }, {
  267. # single playlist video
  268. 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
  269. 'info_dict': {
  270. 'id': 'MSPO40010515-part2',
  271. 'ext': 'flv',
  272. 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
  273. 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
  274. },
  275. 'params': {
  276. 'skip_download': True,
  277. },
  278. 'expected_warnings': ['Video is geo restricted'],
  279. 'skip': 'particular part is not supported currently',
  280. }, {
  281. 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
  282. 'playlist': [{
  283. 'info_dict': {
  284. 'id': 'MSPO40010515AH',
  285. 'ext': 'mp4',
  286. 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
  287. 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
  288. 'duration': 772,
  289. 'series': 'Tour de Ski',
  290. 'episode': '06.01.2015',
  291. },
  292. 'params': {
  293. 'skip_download': True,
  294. },
  295. }, {
  296. 'info_dict': {
  297. 'id': 'MSPO40010515BH',
  298. 'ext': 'mp4',
  299. 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
  300. 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
  301. 'duration': 6175,
  302. 'series': 'Tour de Ski',
  303. 'episode': '06.01.2015',
  304. },
  305. 'params': {
  306. 'skip_download': True,
  307. },
  308. }],
  309. 'info_dict': {
  310. 'id': 'MSPO40010515',
  311. 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
  312. 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
  313. },
  314. 'expected_warnings': ['Video is geo restricted'],
  315. }, {
  316. 'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
  317. 'info_dict': {
  318. 'id': 'KMTE50001317AA',
  319. 'ext': 'mp4',
  320. 'title': 'Anno 13:30',
  321. 'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
  322. 'duration': 2340,
  323. 'series': 'Anno',
  324. 'episode': '13:30',
  325. 'season_number': 3,
  326. 'episode_number': 13,
  327. },
  328. 'params': {
  329. 'skip_download': True,
  330. },
  331. }, {
  332. 'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
  333. 'info_dict': {
  334. 'id': 'MUHH46000317AA',
  335. 'ext': 'mp4',
  336. 'title': 'Nytt på Nytt 27.01.2017',
  337. 'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
  338. 'duration': 1796,
  339. 'series': 'Nytt på nytt',
  340. 'episode': '27.01.2017',
  341. },
  342. 'params': {
  343. 'skip_download': True,
  344. },
  345. }, {
  346. 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
  347. 'only_matching': True,
  348. }, {
  349. 'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
  350. 'only_matching': True,
  351. }]
  352. class NRKTVEpisodeIE(InfoExtractor):
  353. _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
  354. _TESTS = [{
  355. 'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
  356. 'info_dict': {
  357. 'id': 'MUHH36005220BA',
  358. 'ext': 'mp4',
  359. 'title': 'Kro, krig og kjærlighet 2:6',
  360. 'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350',
  361. 'duration': 1563,
  362. 'series': 'Hellums kro',
  363. 'season_number': 1,
  364. 'episode_number': 2,
  365. 'episode': '2:6',
  366. 'age_limit': 6,
  367. },
  368. 'params': {
  369. 'skip_download': True,
  370. },
  371. }, {
  372. 'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
  373. 'info_dict': {
  374. 'id': 'MSUI14000816AA',
  375. 'ext': 'mp4',
  376. 'title': 'Backstage 8:30',
  377. 'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
  378. 'duration': 1320,
  379. 'series': 'Backstage',
  380. 'season_number': 1,
  381. 'episode_number': 8,
  382. 'episode': '8:30',
  383. },
  384. 'params': {
  385. 'skip_download': True,
  386. },
  387. 'skip': 'ProgramRightsHasExpired',
  388. }]
  389. def _real_extract(self, url):
  390. display_id = self._match_id(url)
  391. webpage = self._download_webpage(url, display_id)
  392. info = self._search_json_ld(webpage, display_id, default={})
  393. nrk_id = info.get('@id') or self._html_search_meta(
  394. 'nrk:program-id', webpage, default=None) or self._search_regex(
  395. r'data-program-id=["\'](%s)' % NRKTVIE._EPISODE_RE, webpage,
  396. 'nrk id')
  397. assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
  398. info.update({
  399. '_type': 'url_transparent',
  400. 'id': nrk_id,
  401. 'url': 'nrk:%s' % nrk_id,
  402. 'ie_key': NRKIE.ie_key(),
  403. })
  404. return info
  405. class NRKTVSerieBaseIE(InfoExtractor):
  406. def _extract_series(self, webpage, display_id, fatal=True):
  407. config = self._parse_json(
  408. self._search_regex(
  409. (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
  410. r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
  411. webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
  412. display_id, fatal=False, transform_source=js_to_json)
  413. if not config:
  414. return
  415. return try_get(
  416. config,
  417. (lambda x: x['initialState']['series'], lambda x: x['series']),
  418. dict)
  419. def _extract_seasons(self, seasons):
  420. if not isinstance(seasons, list):
  421. return []
  422. entries = []
  423. for season in seasons:
  424. entries.extend(self._extract_episodes(season))
  425. return entries
  426. def _extract_episodes(self, season):
  427. if not isinstance(season, dict):
  428. return []
  429. return self._extract_entries(season.get('episodes'))
  430. def _extract_entries(self, entry_list):
  431. if not isinstance(entry_list, list):
  432. return []
  433. entries = []
  434. for episode in entry_list:
  435. nrk_id = episode.get('prfId')
  436. if not nrk_id or not isinstance(nrk_id, compat_str):
  437. continue
  438. entries.append(self.url_result(
  439. 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
  440. return entries
  441. class NRKTVSeasonIE(NRKTVSerieBaseIE):
  442. _VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
  443. _TEST = {
  444. 'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
  445. 'info_dict': {
  446. 'id': '1',
  447. 'title': 'Sesong 1',
  448. },
  449. 'playlist_mincount': 30,
  450. }
  451. @classmethod
  452. def suitable(cls, url):
  453. return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url)
  454. else super(NRKTVSeasonIE, cls).suitable(url))
  455. def _real_extract(self, url):
  456. display_id = self._match_id(url)
  457. webpage = self._download_webpage(url, display_id)
  458. series = self._extract_series(webpage, display_id)
  459. season = next(
  460. s for s in series['seasons']
  461. if int(display_id) == s.get('seasonNumber'))
  462. title = try_get(season, lambda x: x['titles']['title'], compat_str)
  463. return self.playlist_result(
  464. self._extract_episodes(season), display_id, title)
  465. class NRKTVSeriesIE(NRKTVSerieBaseIE):
  466. _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
  467. _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
  468. _TESTS = [{
  469. 'url': 'https://tv.nrk.no/serie/blank',
  470. 'info_dict': {
  471. 'id': 'blank',
  472. 'title': 'Blank',
  473. 'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e',
  474. },
  475. 'playlist_mincount': 30,
  476. }, {
  477. # new layout, seasons
  478. 'url': 'https://tv.nrk.no/serie/backstage',
  479. 'info_dict': {
  480. 'id': 'backstage',
  481. 'title': 'Backstage',
  482. 'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
  483. },
  484. 'playlist_mincount': 60,
  485. }, {
  486. # new layout, instalments
  487. 'url': 'https://tv.nrk.no/serie/groenn-glede',
  488. 'info_dict': {
  489. 'id': 'groenn-glede',
  490. 'title': 'Grønn glede',
  491. 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
  492. },
  493. 'playlist_mincount': 10,
  494. }, {
  495. # old layout
  496. 'url': 'https://tv.nrksuper.no/serie/labyrint',
  497. 'info_dict': {
  498. 'id': 'labyrint',
  499. 'title': 'Labyrint',
  500. 'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
  501. },
  502. 'playlist_mincount': 3,
  503. }, {
  504. 'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
  505. 'only_matching': True,
  506. }, {
  507. 'url': 'https://tv.nrk.no/serie/saving-the-human-race',
  508. 'only_matching': True,
  509. }, {
  510. 'url': 'https://tv.nrk.no/serie/postmann-pat',
  511. 'only_matching': True,
  512. }]
  513. @classmethod
  514. def suitable(cls, url):
  515. return (
  516. False if any(ie.suitable(url)
  517. for ie in (NRKTVIE, NRKTVEpisodeIE, NRKTVSeasonIE))
  518. else super(NRKTVSeriesIE, cls).suitable(url))
  519. def _real_extract(self, url):
  520. series_id = self._match_id(url)
  521. webpage = self._download_webpage(url, series_id)
  522. # New layout (e.g. https://tv.nrk.no/serie/backstage)
  523. series = self._extract_series(webpage, series_id, fatal=False)
  524. if series:
  525. title = try_get(series, lambda x: x['titles']['title'], compat_str)
  526. description = try_get(
  527. series, lambda x: x['titles']['subtitle'], compat_str)
  528. entries = []
  529. entries.extend(self._extract_seasons(series.get('seasons')))
  530. entries.extend(self._extract_entries(series.get('instalments')))
  531. entries.extend(self._extract_episodes(series.get('extraMaterial')))
  532. return self.playlist_result(entries, series_id, title, description)
  533. # Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
  534. entries = [
  535. self.url_result(
  536. 'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
  537. series=series_id, season=season_id))
  538. for season_id in re.findall(self._ITEM_RE, webpage)
  539. ]
  540. title = self._html_search_meta(
  541. 'seriestitle', webpage,
  542. 'title', default=None) or self._og_search_title(
  543. webpage, fatal=False)
  544. if title:
  545. title = self._search_regex(
  546. r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
  547. description = self._html_search_meta(
  548. 'series_description', webpage,
  549. 'description', default=None) or self._og_search_description(webpage)
  550. return self.playlist_result(entries, series_id, title, description)
  551. class NRKTVDirekteIE(NRKTVIE):
  552. IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
  553. _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
  554. _TESTS = [{
  555. 'url': 'https://tv.nrk.no/direkte/nrk1',
  556. 'only_matching': True,
  557. }, {
  558. 'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
  559. 'only_matching': True,
  560. }]
  561. class NRKPlaylistBaseIE(InfoExtractor):
  562. def _extract_description(self, webpage):
  563. pass
  564. def _real_extract(self, url):
  565. playlist_id = self._match_id(url)
  566. webpage = self._download_webpage(url, playlist_id)
  567. entries = [
  568. self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
  569. for video_id in re.findall(self._ITEM_RE, webpage)
  570. ]
  571. playlist_title = self. _extract_title(webpage)
  572. playlist_description = self._extract_description(webpage)
  573. return self.playlist_result(
  574. entries, playlist_id, playlist_title, playlist_description)
  575. class NRKPlaylistIE(NRKPlaylistBaseIE):
  576. _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
  577. _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
  578. _TESTS = [{
  579. 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
  580. 'info_dict': {
  581. 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
  582. 'title': 'Gjenopplev den historiske solformørkelsen',
  583. 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
  584. },
  585. 'playlist_count': 2,
  586. }, {
  587. 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
  588. 'info_dict': {
  589. 'id': 'rivertonprisen-til-karin-fossum-1.12266449',
  590. 'title': 'Rivertonprisen til Karin Fossum',
  591. 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
  592. },
  593. 'playlist_count': 2,
  594. }]
  595. def _extract_title(self, webpage):
  596. return self._og_search_title(webpage, fatal=False)
  597. def _extract_description(self, webpage):
  598. return self._og_search_description(webpage)
  599. class NRKTVEpisodesIE(NRKPlaylistBaseIE):
  600. _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
  601. _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
  602. _TESTS = [{
  603. 'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
  604. 'info_dict': {
  605. 'id': '69031',
  606. 'title': 'Nytt på nytt, sesong: 201210',
  607. },
  608. 'playlist_count': 4,
  609. }]
  610. def _extract_title(self, webpage):
  611. return self._html_search_regex(
  612. r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
  613. class NRKSkoleIE(InfoExtractor):
  614. IE_DESC = 'NRK Skole'
  615. _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
  616. _TESTS = [{
  617. 'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
  618. 'md5': '18c12c3d071953c3bf8d54ef6b2587b7',
  619. 'info_dict': {
  620. 'id': '6021',
  621. 'ext': 'mp4',
  622. 'title': 'Genetikk og eneggede tvillinger',
  623. 'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d',
  624. 'duration': 399,
  625. },
  626. }, {
  627. 'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355',
  628. 'only_matching': True,
  629. }]
  630. def _real_extract(self, url):
  631. video_id = self._match_id(url)
  632. webpage = self._download_webpage(
  633. 'https://mimir.nrk.no/plugin/1.0/static?mediaId=%s' % video_id,
  634. video_id)
  635. nrk_id = self._parse_json(
  636. self._search_regex(
  637. r'<script[^>]+type=["\']application/json["\'][^>]*>({.+?})</script>',
  638. webpage, 'application json'),
  639. video_id)['activeMedia']['psId']
  640. return self.url_result('nrk:%s' % nrk_id)