limelight.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_HTTPError
  6. from ..utils import (
  7. determine_ext,
  8. float_or_none,
  9. int_or_none,
  10. unsmuggle_url,
  11. ExtractorError,
  12. )
  13. class LimelightBaseIE(InfoExtractor):
  14. _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
  15. _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
  16. def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
  17. headers = {}
  18. if referer:
  19. headers['Referer'] = referer
  20. try:
  21. return self._download_json(
  22. self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
  23. item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
  24. except ExtractorError as e:
  25. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
  26. error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
  27. if error == 'CountryDisabled':
  28. self.raise_geo_restricted()
  29. raise ExtractorError(error, expected=True)
  30. raise
  31. def _call_api(self, organization_id, item_id, method):
  32. return self._download_json(
  33. self._API_URL % (organization_id, self._API_PATH, item_id, method),
  34. item_id, 'Downloading API %s JSON' % method)
  35. def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
  36. pc = self._call_playlist_service(item_id, pc_method, referer=referer)
  37. metadata = self._call_api(pc['orgId'], item_id, meta_method)
  38. mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer)
  39. return pc, mobile, metadata
  40. def _extract_info(self, streams, mobile_urls, properties):
  41. video_id = properties['media_id']
  42. formats = []
  43. urls = []
  44. for stream in streams:
  45. stream_url = stream.get('url')
  46. if not stream_url or stream.get('drmProtected') or stream_url in urls:
  47. continue
  48. urls.append(stream_url)
  49. ext = determine_ext(stream_url)
  50. if ext == 'f4m':
  51. formats.extend(self._extract_f4m_formats(
  52. stream_url, video_id, f4m_id='hds', fatal=False))
  53. else:
  54. fmt = {
  55. 'url': stream_url,
  56. 'abr': float_or_none(stream.get('audioBitRate')),
  57. 'vbr': float_or_none(stream.get('videoBitRate')),
  58. 'fps': float_or_none(stream.get('videoFrameRate')),
  59. 'width': int_or_none(stream.get('videoWidthInPixels')),
  60. 'height': int_or_none(stream.get('videoHeightInPixels')),
  61. 'ext': ext,
  62. }
  63. rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
  64. if rtmp:
  65. format_id = 'rtmp'
  66. if stream.get('videoBitRate'):
  67. format_id += '-%d' % int_or_none(stream['videoBitRate'])
  68. http_format_id = format_id.replace('rtmp', 'http')
  69. CDN_HOSTS = (
  70. ('delvenetworks.com', 'cpl.delvenetworks.com'),
  71. ('video.llnw.net', 's2.content.video.llnw.net'),
  72. )
  73. for cdn_host, http_host in CDN_HOSTS:
  74. if cdn_host not in rtmp.group('host').lower():
  75. continue
  76. http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
  77. urls.append(http_url)
  78. if self._is_valid_url(http_url, video_id, http_format_id):
  79. http_fmt = fmt.copy()
  80. http_fmt.update({
  81. 'url': http_url,
  82. 'format_id': http_format_id,
  83. })
  84. formats.append(http_fmt)
  85. break
  86. fmt.update({
  87. 'url': rtmp.group('url'),
  88. 'play_path': rtmp.group('playpath'),
  89. 'app': rtmp.group('app'),
  90. 'ext': 'flv',
  91. 'format_id': format_id,
  92. })
  93. formats.append(fmt)
  94. for mobile_url in mobile_urls:
  95. media_url = mobile_url.get('mobileUrl')
  96. format_id = mobile_url.get('targetMediaPlatform')
  97. if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
  98. continue
  99. urls.append(media_url)
  100. ext = determine_ext(media_url)
  101. if ext == 'm3u8':
  102. formats.extend(self._extract_m3u8_formats(
  103. media_url, video_id, 'mp4', 'm3u8_native',
  104. m3u8_id=format_id, fatal=False))
  105. elif ext == 'f4m':
  106. formats.extend(self._extract_f4m_formats(
  107. stream_url, video_id, f4m_id=format_id, fatal=False))
  108. else:
  109. formats.append({
  110. 'url': media_url,
  111. 'format_id': format_id,
  112. 'preference': -1,
  113. 'ext': ext,
  114. })
  115. self._sort_formats(formats)
  116. title = properties['title']
  117. description = properties.get('description')
  118. timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date'))
  119. duration = float_or_none(properties.get('duration_in_milliseconds'), 1000)
  120. filesize = int_or_none(properties.get('total_storage_in_bytes'))
  121. categories = [properties.get('category')]
  122. tags = properties.get('tags', [])
  123. thumbnails = [{
  124. 'url': thumbnail['url'],
  125. 'width': int_or_none(thumbnail.get('width')),
  126. 'height': int_or_none(thumbnail.get('height')),
  127. } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
  128. subtitles = {}
  129. for caption in properties.get('captions', []):
  130. lang = caption.get('language_code')
  131. subtitles_url = caption.get('url')
  132. if lang and subtitles_url:
  133. subtitles.setdefault(lang, []).append({
  134. 'url': subtitles_url,
  135. })
  136. closed_captions_url = properties.get('closed_captions_url')
  137. if closed_captions_url:
  138. subtitles.setdefault('en', []).append({
  139. 'url': closed_captions_url,
  140. 'ext': 'ttml',
  141. })
  142. return {
  143. 'id': video_id,
  144. 'title': title,
  145. 'description': description,
  146. 'formats': formats,
  147. 'timestamp': timestamp,
  148. 'duration': duration,
  149. 'filesize': filesize,
  150. 'categories': categories,
  151. 'tags': tags,
  152. 'thumbnails': thumbnails,
  153. 'subtitles': subtitles,
  154. }
  155. class LimelightMediaIE(LimelightBaseIE):
  156. IE_NAME = 'limelight'
  157. _VALID_URL = r'''(?x)
  158. (?:
  159. limelight:media:|
  160. https?://
  161. (?:
  162. link\.videoplatform\.limelight\.com/media/|
  163. assets\.delvenetworks\.com/player/loader\.swf
  164. )
  165. \?.*?\bmediaId=
  166. )
  167. (?P<id>[a-z0-9]{32})
  168. '''
  169. _TESTS = [{
  170. 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
  171. 'info_dict': {
  172. 'id': '3ffd040b522b4485b6d84effc750cd86',
  173. 'ext': 'mp4',
  174. 'title': 'HaP and the HB Prince Trailer',
  175. 'description': 'md5:8005b944181778e313d95c1237ddb640',
  176. 'thumbnail': r're:^https?://.*\.jpeg$',
  177. 'duration': 144.23,
  178. 'timestamp': 1244136834,
  179. 'upload_date': '20090604',
  180. },
  181. 'params': {
  182. # m3u8 download
  183. 'skip_download': True,
  184. },
  185. }, {
  186. # video with subtitles
  187. 'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
  188. 'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
  189. 'info_dict': {
  190. 'id': 'a3e00274d4564ec4a9b29b9466432335',
  191. 'ext': 'mp4',
  192. 'title': '3Play Media Overview Video',
  193. 'thumbnail': r're:^https?://.*\.jpeg$',
  194. 'duration': 78.101,
  195. 'timestamp': 1338929955,
  196. 'upload_date': '20120605',
  197. 'subtitles': 'mincount:9',
  198. },
  199. }, {
  200. 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
  201. 'only_matching': True,
  202. }]
  203. _PLAYLIST_SERVICE_PATH = 'media'
  204. _API_PATH = 'media'
  205. def _real_extract(self, url):
  206. url, smuggled_data = unsmuggle_url(url, {})
  207. video_id = self._match_id(url)
  208. self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
  209. pc, mobile, metadata = self._extract(
  210. video_id, 'getPlaylistByMediaId',
  211. 'getMobilePlaylistByMediaId', 'properties',
  212. smuggled_data.get('source_url'))
  213. return self._extract_info(
  214. pc['playlistItems'][0].get('streams', []),
  215. mobile['mediaList'][0].get('mobileUrls', []) if mobile else [],
  216. metadata)
  217. class LimelightChannelIE(LimelightBaseIE):
  218. IE_NAME = 'limelight:channel'
  219. _VALID_URL = r'''(?x)
  220. (?:
  221. limelight:channel:|
  222. https?://
  223. (?:
  224. link\.videoplatform\.limelight\.com/media/|
  225. assets\.delvenetworks\.com/player/loader\.swf
  226. )
  227. \?.*?\bchannelId=
  228. )
  229. (?P<id>[a-z0-9]{32})
  230. '''
  231. _TESTS = [{
  232. 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
  233. 'info_dict': {
  234. 'id': 'ab6a524c379342f9b23642917020c082',
  235. 'title': 'Javascript Sample Code',
  236. },
  237. 'playlist_mincount': 3,
  238. }, {
  239. 'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
  240. 'only_matching': True,
  241. }]
  242. _PLAYLIST_SERVICE_PATH = 'channel'
  243. _API_PATH = 'channels'
  244. def _real_extract(self, url):
  245. url, smuggled_data = unsmuggle_url(url, {})
  246. channel_id = self._match_id(url)
  247. pc, mobile, medias = self._extract(
  248. channel_id, 'getPlaylistByChannelId',
  249. 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
  250. 'media', smuggled_data.get('source_url'))
  251. entries = [
  252. self._extract_info(
  253. pc['playlistItems'][i].get('streams', []),
  254. mobile['mediaList'][i].get('mobileUrls', []) if mobile else [],
  255. medias['media_list'][i])
  256. for i in range(len(medias['media_list']))]
  257. return self.playlist_result(entries, channel_id, pc['title'])
  258. class LimelightChannelListIE(LimelightBaseIE):
  259. IE_NAME = 'limelight:channel_list'
  260. _VALID_URL = r'''(?x)
  261. (?:
  262. limelight:channel_list:|
  263. https?://
  264. (?:
  265. link\.videoplatform\.limelight\.com/media/|
  266. assets\.delvenetworks\.com/player/loader\.swf
  267. )
  268. \?.*?\bchannelListId=
  269. )
  270. (?P<id>[a-z0-9]{32})
  271. '''
  272. _TESTS = [{
  273. 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
  274. 'info_dict': {
  275. 'id': '301b117890c4465c8179ede21fd92e2b',
  276. 'title': 'Website - Hero Player',
  277. },
  278. 'playlist_mincount': 2,
  279. }, {
  280. 'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
  281. 'only_matching': True,
  282. }]
  283. _PLAYLIST_SERVICE_PATH = 'channel_list'
  284. def _real_extract(self, url):
  285. channel_list_id = self._match_id(url)
  286. channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById')
  287. entries = [
  288. self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
  289. for channel in channel_list['channelList']]
  290. return self.playlist_result(entries, channel_list_id, channel_list['title'])