medaltv.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. ExtractorError,
  8. float_or_none,
  9. int_or_none,
  10. str_or_none,
  11. try_get,
  12. )
  13. class MedalTVIE(InfoExtractor):
  14. _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
  15. _TESTS = [{
  16. 'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
  17. 'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
  18. 'info_dict': {
  19. 'id': '34934644',
  20. 'ext': 'mp4',
  21. 'title': 'Quad Cold',
  22. 'description': 'Medal,https://medal.tv/desktop/',
  23. 'uploader': 'MowgliSB',
  24. 'timestamp': 1603165266,
  25. 'upload_date': '20201020',
  26. 'uploader_id': 10619174,
  27. }
  28. }, {
  29. 'url': 'https://medal.tv/clips/36787208',
  30. 'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
  31. 'info_dict': {
  32. 'id': '36787208',
  33. 'ext': 'mp4',
  34. 'title': 'u tk me i tk u bigger',
  35. 'description': 'Medal,https://medal.tv/desktop/',
  36. 'uploader': 'Mimicc',
  37. 'timestamp': 1605580939,
  38. 'upload_date': '20201117',
  39. 'uploader_id': 5156321,
  40. }
  41. }]
  42. def _real_extract(self, url):
  43. video_id = self._match_id(url)
  44. webpage = self._download_webpage(url, video_id)
  45. hydration_data = self._parse_json(self._search_regex(
  46. r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
  47. webpage, 'hydration data', default='{}'), video_id)
  48. clip = try_get(
  49. hydration_data, lambda x: x['clips'][video_id], dict) or {}
  50. if not clip:
  51. raise ExtractorError(
  52. 'Could not find video information.', video_id=video_id)
  53. title = clip['contentTitle']
  54. source_width = int_or_none(clip.get('sourceWidth'))
  55. source_height = int_or_none(clip.get('sourceHeight'))
  56. aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
  57. def add_item(container, item_url, height, id_key='format_id', item_id=None):
  58. item_id = item_id or '%dp' % height
  59. if item_id not in item_url:
  60. return
  61. width = int(round(aspect_ratio * height))
  62. container.append({
  63. 'url': item_url,
  64. id_key: item_id,
  65. 'width': width,
  66. 'height': height
  67. })
  68. formats = []
  69. thumbnails = []
  70. for k, v in clip.items():
  71. if not (v and isinstance(v, compat_str)):
  72. continue
  73. mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
  74. if not mobj:
  75. continue
  76. prefix = mobj.group(1)
  77. height = int_or_none(mobj.group(2))
  78. if prefix == 'contentUrl':
  79. add_item(
  80. formats, v, height or source_height,
  81. item_id=None if height else 'source')
  82. elif prefix == 'thumbnail':
  83. add_item(thumbnails, v, height, 'id')
  84. error = clip.get('error')
  85. if not formats and error:
  86. if error == 404:
  87. raise ExtractorError(
  88. 'That clip does not exist.',
  89. expected=True, video_id=video_id)
  90. else:
  91. raise ExtractorError(
  92. 'An unknown error occurred ({0}).'.format(error),
  93. video_id=video_id)
  94. self._sort_formats(formats)
  95. # Necessary because the id of the author is not known in advance.
  96. # Won't raise an issue if no profile can be found as this is optional.
  97. author = try_get(
  98. hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
  99. author_id = str_or_none(author.get('id'))
  100. author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
  101. return {
  102. 'id': video_id,
  103. 'title': title,
  104. 'formats': formats,
  105. 'thumbnails': thumbnails,
  106. 'description': clip.get('contentDescription'),
  107. 'uploader': author.get('displayName'),
  108. 'timestamp': float_or_none(clip.get('created'), 1000),
  109. 'uploader_id': author_id,
  110. 'uploader_url': author_url,
  111. 'duration': int_or_none(clip.get('videoLengthSeconds')),
  112. 'view_count': int_or_none(clip.get('views')),
  113. 'like_count': int_or_none(clip.get('likes')),
  114. 'comment_count': int_or_none(clip.get('comments')),
  115. }