picarto.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import time
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. ExtractorError,
  8. js_to_json,
  9. update_url_query,
  10. urlencode_postdata,
  11. )
  12. class PicartoIE(InfoExtractor):
  13. _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
  14. _TEST = {
  15. 'url': 'https://picarto.tv/Setz',
  16. 'info_dict': {
  17. 'id': 'Setz',
  18. 'ext': 'mp4',
  19. 'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  20. 'timestamp': int,
  21. 'is_live': True
  22. },
  23. 'skip': 'Stream is offline',
  24. }
  25. @classmethod
  26. def suitable(cls, url):
  27. return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
  28. def _real_extract(self, url):
  29. channel_id = self._match_id(url)
  30. metadata = self._download_json(
  31. 'https://api.picarto.tv/v1/channel/name/' + channel_id,
  32. channel_id)
  33. if metadata.get('online') is False:
  34. raise ExtractorError('Stream is offline', expected=True)
  35. cdn_data = self._download_json(
  36. 'https://picarto.tv/process/channel', channel_id,
  37. data=urlencode_postdata({'loadbalancinginfo': channel_id}),
  38. note='Downloading load balancing info')
  39. token = self._VALID_URL_RE.match(url).group('token') or 'public'
  40. params = {
  41. 'con': int(time.time() * 1000),
  42. 'token': token,
  43. }
  44. prefered_edge = cdn_data.get('preferedEdge')
  45. formats = []
  46. for edge in cdn_data['edges']:
  47. edge_ep = edge.get('ep')
  48. if not edge_ep or not isinstance(edge_ep, compat_str):
  49. continue
  50. edge_id = edge.get('id')
  51. for tech in cdn_data['techs']:
  52. tech_label = tech.get('label')
  53. tech_type = tech.get('type')
  54. preference = 0
  55. if edge_id == prefered_edge:
  56. preference += 1
  57. format_id = []
  58. if edge_id:
  59. format_id.append(edge_id)
  60. if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
  61. format_id.append('hls')
  62. formats.extend(self._extract_m3u8_formats(
  63. update_url_query(
  64. 'https://%s/hls/%s/index.m3u8'
  65. % (edge_ep, channel_id), params),
  66. channel_id, 'mp4', preference=preference,
  67. m3u8_id='-'.join(format_id), fatal=False))
  68. continue
  69. elif tech_type == 'video/mp4' or tech_label == 'MP4':
  70. format_id.append('mp4')
  71. formats.append({
  72. 'url': update_url_query(
  73. 'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
  74. params),
  75. 'format_id': '-'.join(format_id),
  76. 'preference': preference,
  77. })
  78. else:
  79. # rtmp format does not seem to work
  80. continue
  81. self._sort_formats(formats)
  82. mature = metadata.get('adult')
  83. if mature is None:
  84. age_limit = None
  85. else:
  86. age_limit = 18 if mature is True else 0
  87. return {
  88. 'id': channel_id,
  89. 'title': self._live_title(channel_id),
  90. 'is_live': True,
  91. 'thumbnail': metadata.get('thumbnails', {}).get('web'),
  92. 'age_limit': age_limit,
  93. 'formats': formats,
  94. }
  95. class PicartoVodIE(InfoExtractor):
  96. _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
  97. _TESTS = [{
  98. 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
  99. 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
  100. 'info_dict': {
  101. 'id': 'ArtofZod_2017.12.12.00.13.23.flv',
  102. 'ext': 'mp4',
  103. 'title': 'ArtofZod_2017.12.12.00.13.23.flv',
  104. 'thumbnail': r're:^https?://.*\.jpg'
  105. },
  106. }, {
  107. 'url': 'https://picarto.tv/videopopout/Plague',
  108. 'only_matching': True,
  109. }]
  110. def _real_extract(self, url):
  111. video_id = self._match_id(url)
  112. webpage = self._download_webpage(url, video_id)
  113. vod_info = self._parse_json(
  114. self._search_regex(
  115. r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
  116. video_id),
  117. video_id, transform_source=js_to_json)
  118. formats = self._extract_m3u8_formats(
  119. vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
  120. m3u8_id='hls')
  121. self._sort_formats(formats)
  122. return {
  123. 'id': video_id,
  124. 'title': video_id,
  125. 'thumbnail': vod_info.get('vodThumb'),
  126. 'formats': formats,
  127. }