rutube.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import itertools
  5. from .common import InfoExtractor
  6. from ..compat import (
  7. compat_str,
  8. compat_parse_qs,
  9. compat_urllib_parse_urlparse,
  10. )
  11. from ..utils import (
  12. determine_ext,
  13. unified_strdate,
  14. try_get,
  15. int_or_none,
  16. )
  17. class RutubeIE(InfoExtractor):
  18. IE_NAME = 'rutube'
  19. IE_DESC = 'Rutube videos'
  20. _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
  21. _TESTS = [{
  22. 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
  23. 'info_dict': {
  24. 'id': '3eac3b4561676c17df9132a9a1e62e3e',
  25. 'ext': 'mp4',
  26. 'title': 'Раненный кенгуру забежал в аптеку',
  27. 'description': 'http://www.ntdtv.ru ',
  28. 'duration': 80,
  29. 'uploader': 'NTDRussian',
  30. 'uploader_id': '29790',
  31. 'upload_date': '20131016',
  32. 'age_limit': 0,
  33. },
  34. 'params': {
  35. # It requires ffmpeg (m3u8 download)
  36. 'skip_download': True,
  37. },
  38. }, {
  39. 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
  40. 'only_matching': True,
  41. }, {
  42. 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
  43. 'only_matching': True,
  44. }, {
  45. 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
  46. 'only_matching': True,
  47. }]
  48. @classmethod
  49. def suitable(cls, url):
  50. parts = compat_urllib_parse_urlparse(url)
  51. params = compat_parse_qs(parts.query)
  52. # see if URL without parameters is OK
  53. res = super(RutubeIE, cls).suitable(url)
  54. if params: # we only allow pl_id parameter in the url
  55. res = res and 'pl_id' in params and len(params) == 1
  56. return res
  57. @staticmethod
  58. def _extract_urls(webpage):
  59. return [mobj.group('url') for mobj in re.finditer(
  60. r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1',
  61. webpage)]
  62. def _real_extract(self, url):
  63. video_id = self._match_id(url)
  64. video = self._download_json(
  65. 'http://rutube.ru/api/video/%s/?format=json' % video_id,
  66. video_id, 'Downloading video JSON')
  67. # Some videos don't have the author field
  68. author = video.get('author') or {}
  69. options = self._download_json(
  70. 'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
  71. video_id, 'Downloading options JSON')
  72. formats = []
  73. for format_id, format_url in options['video_balancer'].items():
  74. ext = determine_ext(format_url)
  75. if ext == 'm3u8':
  76. formats.extend(self._extract_m3u8_formats(
  77. format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
  78. elif ext == 'f4m':
  79. formats.extend(self._extract_f4m_formats(
  80. format_url, video_id, f4m_id=format_id, fatal=False))
  81. else:
  82. formats.append({
  83. 'url': format_url,
  84. 'format_id': format_id,
  85. })
  86. self._sort_formats(formats)
  87. return {
  88. 'id': video['id'],
  89. 'title': video['title'],
  90. 'description': video['description'],
  91. 'duration': video['duration'],
  92. 'view_count': video['hits'],
  93. 'formats': formats,
  94. 'thumbnail': video['thumbnail_url'],
  95. 'uploader': author.get('name'),
  96. 'uploader_id': compat_str(author['id']) if author else None,
  97. 'upload_date': unified_strdate(video['created_ts']),
  98. 'age_limit': 18 if video['is_adult'] else 0,
  99. }
  100. class RutubeEmbedIE(InfoExtractor):
  101. IE_NAME = 'rutube:embed'
  102. IE_DESC = 'Rutube embedded videos'
  103. _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
  104. _TESTS = [{
  105. 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
  106. 'info_dict': {
  107. 'id': 'a10e53b86e8f349080f718582ce4c661',
  108. 'ext': 'mp4',
  109. 'upload_date': '20131223',
  110. 'uploader_id': '297833',
  111. 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
  112. 'uploader': 'subziro89 ILya',
  113. 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
  114. },
  115. 'params': {
  116. 'skip_download': 'Requires ffmpeg',
  117. },
  118. }, {
  119. 'url': 'http://rutube.ru/play/embed/8083783',
  120. 'only_matching': True,
  121. }]
  122. def _real_extract(self, url):
  123. embed_id = self._match_id(url)
  124. webpage = self._download_webpage(url, embed_id)
  125. canonical_url = self._html_search_regex(
  126. r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
  127. 'Canonical URL')
  128. return self.url_result(canonical_url, 'Rutube')
  129. class RutubeChannelIE(InfoExtractor):
  130. IE_NAME = 'rutube:channel'
  131. IE_DESC = 'Rutube channels'
  132. _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
  133. _TESTS = [{
  134. 'url': 'http://rutube.ru/tags/video/1800/',
  135. 'info_dict': {
  136. 'id': '1800',
  137. },
  138. 'playlist_mincount': 68,
  139. }]
  140. _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
  141. def _extract_videos(self, channel_id, channel_title=None):
  142. entries = []
  143. for pagenum in itertools.count(1):
  144. page = self._download_json(
  145. self._PAGE_TEMPLATE % (channel_id, pagenum),
  146. channel_id, 'Downloading page %s' % pagenum)
  147. results = page['results']
  148. if not results:
  149. break
  150. entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
  151. if not page['has_next']:
  152. break
  153. return self.playlist_result(entries, channel_id, channel_title)
  154. def _real_extract(self, url):
  155. mobj = re.match(self._VALID_URL, url)
  156. channel_id = mobj.group('id')
  157. return self._extract_videos(channel_id)
  158. class RutubeMovieIE(RutubeChannelIE):
  159. IE_NAME = 'rutube:movie'
  160. IE_DESC = 'Rutube movies'
  161. _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
  162. _TESTS = []
  163. _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
  164. _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
  165. def _real_extract(self, url):
  166. movie_id = self._match_id(url)
  167. movie = self._download_json(
  168. self._MOVIE_TEMPLATE % movie_id, movie_id,
  169. 'Downloading movie JSON')
  170. movie_name = movie['name']
  171. return self._extract_videos(movie_id, movie_name)
  172. class RutubePersonIE(RutubeChannelIE):
  173. IE_NAME = 'rutube:person'
  174. IE_DESC = 'Rutube person videos'
  175. _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
  176. _TESTS = [{
  177. 'url': 'http://rutube.ru/video/person/313878/',
  178. 'info_dict': {
  179. 'id': '313878',
  180. },
  181. 'playlist_mincount': 37,
  182. }]
  183. _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
  184. class RutubePlaylistIE(InfoExtractor):
  185. IE_NAME = 'rutube:playlist'
  186. IE_DESC = 'Rutube playlists'
  187. _TESTS = [{
  188. 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
  189. 'info_dict': {
  190. 'id': '4252',
  191. },
  192. 'playlist_count': 25,
  193. }]
  194. _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?(?:.+)?pl_id=(?P<id>\d+)'
  195. _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/source/%s/?page=%s'
  196. @staticmethod
  197. def suitable(url):
  198. params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
  199. return params.get('pl_id') and int_or_none(params['pl_id'][0]) \
  200. and params.get('pl_type')
  201. def _real_extract(self, url):
  202. playlist_id = self._match_id(url)
  203. return self._extract_playlist(playlist_id)
  204. def _extract_playlist(self, playlist_id):
  205. entries = []
  206. for pagenum in itertools.count(1):
  207. page_url = self._PAGE_TEMPLATE % (playlist_id, pagenum)
  208. # download_json will sent an accept: application/xml header
  209. page = self._download_json(page_url, playlist_id,
  210. "Downloading metadata for page %s" % pagenum,
  211. headers={'Accept': 'application/json'})
  212. if not page['results']:
  213. break
  214. results = page['results']
  215. for result in results:
  216. entry = self.url_result(result.get('video_url'), 'Rutube')
  217. category = try_get(result, lambda x: x['category']['name'])
  218. entry.update({
  219. 'id': result.get('id'),
  220. 'uploader': try_get(result, lambda x: x['author']['name']),
  221. 'uploader_id': try_get(result, lambda x: x['author']['id']),
  222. 'upload_date': unified_strdate(result.get('created_ts')),
  223. 'title': result.get('title'),
  224. 'description': result.get('description'),
  225. 'thumbnail': result.get('thumbnail_url'),
  226. 'duration': int_or_none(result.get('duration')),
  227. 'category': [category] if category else None,
  228. 'age_limit': 18 if result.get('is_adult') else 0,
  229. 'view_count': int_or_none(result.get('hits')),
  230. 'is_live': result.get('is_livestream'),
  231. 'webpage_url': result.get('video_url'),
  232. })
  233. entries.append(entry)
  234. if page['has_next'] is False:
  235. break
  236. return self.playlist_result(entries, playlist_id, page['name'])