arkenaplay.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. int_or_none,
  6. parse_iso8601
  7. )
  8. import re
  9. class ArkenaPlayIE(InfoExtractor):
  10. IE_NAME = 'ArkenaPlay'
  11. _VALID_URL = r'(?P<shortcut>arkena:(?P<version>[0-9]+):(?P<mediatype>[A-Za-z0-9]+):(?P<mediaId>[^:]+):(?P<widgetsettingId>[A-Za-z0-9]+):(?P<accountId>[A-Za-z0-9]+))|(?:(?P<host>https?://(?:www\.)?play\..*\..*)/embed/(?:avp/v[0-9]+/player/[A-Za-z0-9]+/)?(?P<id>.*)?)'
  12. _TESTS = [{
  13. 'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
  14. 'md5': '6cea4f7d13810464ef8485a924fc3333',
  15. 'info_dict': {
  16. 'id': '327336',
  17. 'url': 're:http://httpod.scdn.arkena.com/11970/327336.*',
  18. 'ext': 'mp4',
  19. 'title': '327336',
  20. 'upload_date': '20160225',
  21. 'timestamp': 1456391602
  22. }
  23. }, {
  24. # Shortcut for: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
  25. 'url': 'arkena:2:media:b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe:1:129411',
  26. 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
  27. 'info_dict': {
  28. 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
  29. 'url': 'http://88e04ec095b07cd1aa3ea588be47e870.httpcache0.90034-httpcache0.dna.qbrick.com/90034-httpcache0/4bf759a1-00090034/bbb_sunflower_2160p_60fps_normal_720p.mp4',
  30. 'ext': 'mp4',
  31. 'title': 'Big Buck Bunny',
  32. 'description': 'Royalty free test video',
  33. 'upload_date': '20150528',
  34. 'timestamp': 1432816365
  35. }
  36. }]
  37. def _real_extract(self, url):
  38. mobj = re.match(self._VALID_URL, url)
  39. if mobj.group('shortcut'):
  40. version = mobj.group('version')
  41. mediatype = mobj.group('mediatype')
  42. mediaid = mobj.group('mediaId')
  43. widgetsettingid = mobj.group('widgetsettingId')
  44. accountid = mobj.group('accountId')
  45. display_id = '{0}:{1}:{2}:{3}'.format(mediatype, mediaid, widgetsettingid, accountid)
  46. media_url = 'https://play.arkena.com/config/avp/v{0}/player/{1}/{2}/{3}/{4}/?callbackMethod=?'.format(
  47. version, mediatype, mediaid, widgetsettingid, accountid)
  48. else:
  49. display_id = self._search_regex(self._VALID_URL, url, 'host_name', group='id')
  50. webpage = self._download_webpage(url, display_id)
  51. media_url_regex = '"(?P<mediainfo>(?P<host>.*)/(c|C)onfig/.*\?callbackMethod=\?)"'
  52. media_url = self._html_search_regex(media_url_regex, webpage, 'arkena_media_info_url')
  53. hostname = self._html_search_regex(media_url_regex, webpage, 'arkena_media_host', group='host')
  54. if not hostname:
  55. hostname = self._search_regex(self._VALID_URL, url, 'host_name', group='host')
  56. media_url = hostname + media_url
  57. # Extract the required info of the media files gathered in a dictionary
  58. arkena_info = self._download_webpage(media_url, 'arkena_info_')
  59. arkena_info_regex = r'\?\((?P<json>.*)\);'
  60. media_dict = self._parse_json(self._search_regex(arkena_info_regex, arkena_info, 'json', group='json'),
  61. display_id)
  62. # All videos are part of a playlist, a single video is also put in a playlist
  63. playlist_items = media_dict.get('Playlist', [])
  64. if len(playlist_items) == 0:
  65. return self.url_result(url, 'Generic')
  66. elif len(playlist_items) == 1:
  67. arkena_media_info = playlist_items[0]
  68. return self.__extract_from_playlistentry(arkena_media_info)
  69. else:
  70. entries_info = []
  71. for arkena_playlist_item in playlist_items:
  72. entries_info.append(self.__extract_from_playlistentry(arkena_playlist_item))
  73. return {
  74. 'id': display_id,
  75. 'entries': entries_info
  76. }
  77. def __extract_from_playlistentry(self, arkena_playlistentry_info):
  78. media_info = arkena_playlistentry_info.get('MediaInfo', {})
  79. thumbnails = self.__get_thumbnails(media_info)
  80. title = media_info.get('Title')
  81. description = media_info.get('Description')
  82. video_id = media_info.get('VideoId')
  83. timestamp = parse_iso8601(media_info.get('PublishDate'))
  84. formats = self.__get_video_formats(arkena_playlistentry_info, video_id)
  85. return {
  86. 'id': video_id,
  87. 'title': title,
  88. 'formats': formats,
  89. 'thumbnails': thumbnails,
  90. 'description': description,
  91. 'timestamp': timestamp
  92. }
  93. def __get_thumbnails(self, arkena_mediainfo):
  94. thumbnails = []
  95. thumbnails_info = arkena_mediainfo.get('Poster')
  96. if not thumbnails_info:
  97. return None
  98. for thumbnail in thumbnails_info:
  99. thumbnail_url = thumbnail.get('Url')
  100. if not thumbnail_url:
  101. continue
  102. thumbnails.append({
  103. 'url': thumbnail_url,
  104. 'width': int_or_none(thumbnail.get('Size'))
  105. })
  106. return thumbnails
  107. def __get_video_formats(self, media_files_info, video_id):
  108. formats = []
  109. media_files = media_files_info.get('MediaFiles')
  110. if not media_files:
  111. return None
  112. for type_name, video_files_json in media_files.iteritems():
  113. for video_info in video_files_json:
  114. video_url = video_info.get('Url')
  115. if not video_url:
  116. continue
  117. type = video_info.get('Type')
  118. if type_name in ['Mp4', 'WebM', 'Flash']:
  119. bitrate = int_or_none(video_info.get('Bitrate'), scale=1000)
  120. ext = None
  121. if type == 'video/mp4':
  122. ext = 'mp4'
  123. elif type == 'video/webm':
  124. ext = 'webm'
  125. elif type == 'video/x-flv':
  126. ext = 'flv'
  127. formats.append({
  128. 'url': video_url,
  129. 'ext': ext,
  130. 'tbr': bitrate
  131. })
  132. elif type_name == 'M3u8' and type == 'application/x-mpegURL':
  133. formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
  134. elif type_name == 'Flash' and type == 'application/hds+xml':
  135. formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False))
  136. elif type_name == 'Dash' and type == 'application/dash+xml':
  137. formats.extend(self._extract_mpd_formats(video_url, video_id, mpd_id='dash', fatal=False))
  138. self._sort_formats(formats)
  139. return formats