drtv.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. int_or_none,
  7. float_or_none,
  8. mimetype2ext,
  9. parse_iso8601,
  10. remove_end,
  11. update_url_query,
  12. )
  13. class DRTVIE(InfoExtractor):
  14. _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
  15. _GEO_BYPASS = False
  16. _GEO_COUNTRIES = ['DK']
  17. IE_NAME = 'drtv'
  18. _TESTS = [{
  19. 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
  20. 'md5': '25e659cccc9a2ed956110a299fdf5983',
  21. 'info_dict': {
  22. 'id': 'klassen-darlig-taber-10',
  23. 'ext': 'mp4',
  24. 'title': 'Klassen - Dårlig taber (10)',
  25. 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
  26. 'timestamp': 1471991907,
  27. 'upload_date': '20160823',
  28. 'duration': 606.84,
  29. },
  30. 'params': {
  31. 'skip_download': True,
  32. },
  33. }, {
  34. 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
  35. 'md5': '2c37175c718155930f939ef59952474a',
  36. 'info_dict': {
  37. 'id': 'christiania-pusher-street-ryddes-drdkrjpo',
  38. 'ext': 'mp4',
  39. 'title': 'LIVE Christianias rydning af Pusher Street er i gang',
  40. 'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.',
  41. 'timestamp': 1472800279,
  42. 'upload_date': '20160902',
  43. 'duration': 131.4,
  44. },
  45. }, {
  46. 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
  47. 'md5': '7c8ca12e6c3d3e3edd59ba5a9b7ca10a',
  48. 'info_dict': {
  49. 'id': 'historien-om-danmark-stenalder',
  50. 'ext': 'mp4',
  51. 'title': 'Historien om Danmark: Stenalder (1)',
  52. 'description': 'Én fascinerende historie om tusindvis af år, hvor vores land bliver skabt ud af is og vand, og hvor de første danskere ankommer til vores egn. Det bliver en rejse ind i urtiden og det liv, som urtidsjægerne har levet i skovene og ved havet og helt frem til bondestenalderen. Gennem skeletfund afslører eksperter, hvordan vores forfædre har set ud i stenalderen og hvorfor stenaldermennesket byggede de imponerende jættestuer, som ligger overalt i det danske.',
  53. 'timestamp': 1490401996,
  54. 'upload_date': '20170325',
  55. 'duration': 3502.04,
  56. },
  57. }]
  58. def _real_extract(self, url):
  59. video_id = self._match_id(url)
  60. webpage = self._download_webpage(url, video_id)
  61. if '>Programmet er ikke længere tilgængeligt' in webpage:
  62. raise ExtractorError(
  63. 'Video %s is not available' % video_id, expected=True)
  64. video_id = self._search_regex(
  65. (r'data-(?:material-identifier|episode-slug)="([^"]+)"',
  66. r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
  67. webpage, 'video id')
  68. programcard = self._download_json(
  69. 'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
  70. video_id, 'Downloading video JSON')
  71. data = programcard['Data'][0]
  72. title = remove_end(self._og_search_title(
  73. webpage, default=None), ' | TV | DR') or data['Title']
  74. description = self._og_search_description(
  75. webpage, default=None) or data.get('Description')
  76. timestamp = parse_iso8601(data.get('CreatedTime'))
  77. thumbnail = None
  78. duration = None
  79. restricted_to_denmark = False
  80. formats = []
  81. subtitles = {}
  82. for asset in data['Assets']:
  83. kind = asset.get('Kind')
  84. if kind == 'Image':
  85. thumbnail = asset.get('Uri')
  86. preference = 0
  87. sign_language = asset.get('Target') == 'SignLanguage'
  88. if kind in ('VideoResource', 'AudioResource'):
  89. duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
  90. restricted_to_denmark = asset.get('RestrictedToDenmark')
  91. spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
  92. for link in asset.get('Links', []):
  93. uri = link.get('Uri')
  94. if not uri:
  95. continue
  96. target = link.get('Target')
  97. format_id = target or ''
  98. if spoken_subtitles:
  99. preference = -1
  100. format_id += '-spoken-subtitles'
  101. if sign_language:
  102. preference = -1
  103. format_id += "-sign-language"
  104. if target == 'HDS':
  105. f4m_formats = self._extract_f4m_formats(
  106. uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
  107. video_id, preference, f4m_id=format_id)
  108. if kind == 'AudioResource':
  109. for f in f4m_formats:
  110. f['vcodec'] = 'none'
  111. formats.extend(f4m_formats)
  112. elif target == 'HLS':
  113. formats.extend(self._extract_m3u8_formats(
  114. uri, video_id, 'mp4', entry_protocol='m3u8_native',
  115. preference=preference, m3u8_id=format_id))
  116. else:
  117. bitrate = link.get('Bitrate')
  118. if bitrate:
  119. format_id += '-%s' % bitrate
  120. formats.append({
  121. 'url': uri,
  122. 'format_id': format_id,
  123. 'tbr': int_or_none(bitrate),
  124. 'ext': link.get('FileFormat'),
  125. 'vcodec': 'none' if kind == 'AudioResource' else None,
  126. })
  127. subtitles_list = asset.get('SubtitlesList')
  128. if isinstance(subtitles_list, list):
  129. LANGS = {
  130. 'Danish': 'da',
  131. }
  132. for subs in subtitles_list:
  133. if not subs.get('Uri'):
  134. continue
  135. lang = subs.get('Language') or 'da'
  136. subtitles.setdefault(LANGS.get(lang, lang), []).append({
  137. 'url': subs['Uri'],
  138. 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
  139. })
  140. if not formats and restricted_to_denmark:
  141. self.raise_geo_restricted(
  142. 'Unfortunately, DR is not allowed to show this program outside Denmark.',
  143. countries=self._GEO_COUNTRIES)
  144. self._sort_formats(formats)
  145. return {
  146. 'id': video_id,
  147. 'title': title,
  148. 'description': description,
  149. 'thumbnail': thumbnail,
  150. 'timestamp': timestamp,
  151. 'duration': duration,
  152. 'formats': formats,
  153. 'subtitles': subtitles,
  154. }
  155. class DRTVLiveIE(InfoExtractor):
  156. IE_NAME = 'drtv:live'
  157. _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
  158. _GEO_COUNTRIES = ['DK']
  159. _TEST = {
  160. 'url': 'https://www.dr.dk/tv/live/dr1',
  161. 'info_dict': {
  162. 'id': 'dr1',
  163. 'ext': 'mp4',
  164. 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  165. },
  166. 'params': {
  167. # m3u8 download
  168. 'skip_download': True,
  169. },
  170. }
  171. def _real_extract(self, url):
  172. channel_id = self._match_id(url)
  173. channel_data = self._download_json(
  174. 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
  175. channel_id)
  176. title = self._live_title(channel_data['Title'])
  177. formats = []
  178. for streaming_server in channel_data.get('StreamingServers', []):
  179. server = streaming_server.get('Server')
  180. if not server:
  181. continue
  182. link_type = streaming_server.get('LinkType')
  183. for quality in streaming_server.get('Qualities', []):
  184. for stream in quality.get('Streams', []):
  185. stream_path = stream.get('Stream')
  186. if not stream_path:
  187. continue
  188. stream_url = update_url_query(
  189. '%s/%s' % (server, stream_path), {'b': ''})
  190. if link_type == 'HLS':
  191. formats.extend(self._extract_m3u8_formats(
  192. stream_url, channel_id, 'mp4',
  193. m3u8_id=link_type, fatal=False, live=True))
  194. elif link_type == 'HDS':
  195. formats.extend(self._extract_f4m_formats(update_url_query(
  196. '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
  197. channel_id, f4m_id=link_type, fatal=False))
  198. self._sort_formats(formats)
  199. return {
  200. 'id': channel_id,
  201. 'title': title,
  202. 'thumbnail': channel_data.get('PrimaryImageUri'),
  203. 'formats': formats,
  204. 'is_live': True,
  205. }