minds.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (int_or_none, sanitized_Request, str_or_none,
  7. unified_strdate)
  8. class MindsIE(InfoExtractor):
  9. _VALID_URL = r'https?://(?:www\.)?minds\.com/media/(?P<id>[0-9]+)'
  10. _TEST = {
  11. 'url': 'https://www.minds.com/media/100000000000086822',
  12. 'md5': '215a658184a419764852239d4970b045',
  13. 'info_dict': {
  14. 'id': '100000000000086822',
  15. 'ext': 'mp4',
  16. 'title': 'Minds intro sequence',
  17. 'thumbnail': 'https://cdn-cinemr.minds.com/cinemr_com/334128440657580032/thumbnail-00001.png',
  18. 'uploader_id': '100000000000000341',
  19. 'description': '<?xml encoding="utf-8" ?>',
  20. 'upload_date': '20130524',
  21. 'timestamp': 1369404826,
  22. },
  23. 'params': {
  24. 'skip_download': True,
  25. },
  26. }
  27. def _real_extract(self, url):
  28. video_id = self._match_id(url)
  29. video_api_url = 'https://www.minds.com/api/v1/media/%s' % video_id
  30. token = self._get_cookies(url).get('XSRF-TOKEN')
  31. headers = {
  32. 'authority': 'www.minds.com',
  33. 'referer': url,
  34. 'x-xsrf-token': token.value if token else '',
  35. }
  36. data = self._download_json(video_api_url, video_id, headers=headers,
  37. query={'children': 'false'})
  38. formats = []
  39. owner = data.get('ownerObj', {})
  40. transcodes = data.get('transcodes', {})
  41. # These keys are the width so keep the highest width last
  42. keys = sorted(transcodes.keys())
  43. for format_id in keys:
  44. is_numeric = re.match('^[0-9]+\.mp4', format_id)
  45. video_url = transcodes[format_id]
  46. info = {
  47. 'url': video_url,
  48. 'format_id': format_id,
  49. 'http_headers': headers,
  50. }
  51. if is_numeric:
  52. info['width'] = int(format_id.split('.')[0])
  53. formats.append(info)
  54. uploader_id = str_or_none(owner.get('guid') or
  55. data.get('owner_guid') or
  56. owner.get('legacy_guid') or
  57. owner.get('owner_guid'))
  58. description = str_or_none(data.get('description'))
  59. if description:
  60. description = description.strip()
  61. uploader_url = age_limit = thumbnail = None
  62. if owner.get('username'):
  63. uploader_url = 'https://www.minds.com/%s' % owner.get('username')
  64. if data.get('mature') is True:
  65. age_limit = 18
  66. thumbnail_api_url = data.get('thumbnail_src')
  67. if thumbnail_api_url:
  68. req = sanitized_Request(thumbnail_api_url)
  69. req.get_method = lambda: 'HEAD'
  70. res = self._request_webpage(req, video_id)
  71. if res.headers.get('content-type', '').startswith('image/'):
  72. thumbnail = getattr(res, 'url', None)
  73. tags = data.get('tags', '').strip()
  74. if isinstance(tags, compat_str) and tags:
  75. tags = [x.strip() for x in tags.split(',')]
  76. else:
  77. tags = None
  78. category = data.get('category')
  79. if isinstance(category, compat_str) and category:
  80. category = [category]
  81. else:
  82. category = None
  83. return {
  84. 'id': video_id,
  85. 'title': data['title'],
  86. 'formats': formats,
  87. 'description': description,
  88. 'license': str_or_none(data.get('license')),
  89. 'creator': str_or_none(owner.get('name') or owner.get('username')),
  90. 'release_date': unified_strdate(data.get('time_created')),
  91. 'timestamp': int_or_none(data.get('time_created')),
  92. 'uploader_id': uploader_id,
  93. 'uploader_url': uploader_url,
  94. 'view_count': int_or_none(data.get('play:count')),
  95. 'like_count': int_or_none(data.get('thumbs:up:count')),
  96. 'dislike_count': int_or_none(data.get('thumbs:down:count')),
  97. 'average_rating': int_or_none(data.get('rating')),
  98. 'age_limit': age_limit,
  99. 'categories': [str_or_none(data.get('category'))],
  100. 'tags': tags,
  101. # As of 20181020 the API is returning `false` for this value both
  102. # at top level and within the entity.comments:count path. The only
  103. # other way to get this is to fetch all comments and count.
  104. 'comment_count': int_or_none(data.get('comments:count')),
  105. 'thumbnail': thumbnail,
  106. }
  107. class MindsActivityIE(InfoExtractor):
  108. _VALID_URL = r'https?://(?:www\.)?minds\.com/newsfeed/(?P<id>[0-9]+)'
  109. def _real_extract(self, url):
  110. guid = self._match_id(url)
  111. api_url = 'https://www.minds.com/api/v1/newsfeed/single/%s' % guid
  112. token = self._get_cookies(url).get('XSRF-TOKEN')
  113. headers = {
  114. 'authority': 'www.minds.com',
  115. 'referer': url,
  116. 'x-xsrf-token': token.value if token else '',
  117. }
  118. data = self._download_json(api_url, guid, headers=headers)
  119. return self.url_result('https://www.minds.com/media/%s' % data['activity']['entity_guid'])
  120. class MindsChannelIE(InfoExtractor):
  121. _VALID_URL = r'https?://(?:www\.)?minds\.com/(?!newsfeed|media|api)(?P<id>[^/]+)'
  122. def _real_extract(self, url):
  123. channel_name = self._match_id(url)
  124. api_url = 'https://www.minds.com/api/v1/channel/%s' % channel_name
  125. token = self._get_cookies(url).get('XSRF-TOKEN')
  126. headers = {
  127. 'authority': 'www.minds.com',
  128. 'referer': url,
  129. 'x-xsrf-token': token.value if token else '',
  130. }
  131. data = self._download_json(api_url, channel_name, headers=headers)
  132. channel = data.get('channel', {})
  133. params = {'limit': 12, 'offset': ''}
  134. api_url = 'https://www.minds.com/api/v1/newsfeed/personal/%s' % channel['guid']
  135. entries = []
  136. while True:
  137. data = self._download_json(api_url, channel['guid'],
  138. headers=headers, query=params)
  139. activity = data.get('activity', [])
  140. if len(activity) == 0 or not data.get('load-next'):
  141. break
  142. for info in activity:
  143. if info.get('custom_type') != 'video':
  144. continue
  145. entries.append(self.url_result('https://www.minds.com/media/%s' % info['entity_guid']))
  146. params['offset'] = data['load-next']
  147. return self.playlist_result(entries,
  148. playlist_title='%s activity' % channel_name)