yandexdisk.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..compat import compat_str
  5. from ..utils import (
  6. determine_ext,
  7. float_or_none,
  8. int_or_none,
  9. try_get,
  10. urlencode_postdata,
  11. )
  12. class YandexDiskIE(InfoExtractor):
  13. _VALID_URL = r'https?://yadi\.sk/i/(?P<id>[^/?#&]+)'
  14. _TEST = {
  15. 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
  16. 'md5': '33955d7ae052f15853dc41f35f17581c',
  17. 'info_dict': {
  18. 'id': 'VdOeDou8eZs6Y',
  19. 'ext': 'mp4',
  20. 'title': '4.mp4',
  21. 'duration': 168.6,
  22. 'uploader': 'y.botova',
  23. 'uploader_id': '300043621',
  24. 'view_count': int,
  25. },
  26. }
  27. def _real_extract(self, url):
  28. video_id = self._match_id(url)
  29. status = self._download_webpage(
  30. 'https://disk.yandex.com/auth/status', video_id, query={
  31. 'urlOrigin': url,
  32. 'source': 'public',
  33. 'md5': 'false',
  34. })
  35. sk = self._search_regex(
  36. r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2',
  37. status, 'sk', group='value')
  38. webpage = self._download_webpage(url, video_id)
  39. models = self._parse_json(
  40. self._search_regex(
  41. r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script',
  42. webpage, 'video JSON'),
  43. video_id)
  44. data = next(
  45. model['data'] for model in models
  46. if model.get('model') == 'resource')
  47. video_hash = data['id']
  48. title = data['name']
  49. models = self._download_json(
  50. 'https://disk.yandex.com/models/', video_id,
  51. data=urlencode_postdata({
  52. '_model.0': 'videoInfo',
  53. 'id.0': video_hash,
  54. '_model.1': 'do-get-resource-url',
  55. 'id.1': video_hash,
  56. 'version': '13.6',
  57. 'sk': sk,
  58. }), query={'_m': 'videoInfo'})['models']
  59. videos = try_get(models, lambda x: x[0]['data']['videos'], list) or []
  60. source_url = try_get(
  61. models, lambda x: x[1]['data']['file'], compat_str)
  62. formats = []
  63. if source_url:
  64. formats.append({
  65. 'url': source_url,
  66. 'format_id': 'source',
  67. 'ext': determine_ext(title, 'mp4'),
  68. 'quality': 1,
  69. })
  70. for video in videos:
  71. format_url = video.get('url')
  72. if not format_url:
  73. continue
  74. if determine_ext(format_url) == 'm3u8':
  75. formats.extend(self._extract_m3u8_formats(
  76. format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  77. m3u8_id='hls', fatal=False))
  78. else:
  79. formats.append({
  80. 'url': format_url,
  81. })
  82. self._sort_formats(formats)
  83. duration = float_or_none(try_get(
  84. models, lambda x: x[0]['data']['duration']), 1000)
  85. uploader = try_get(
  86. data, lambda x: x['user']['display_name'], compat_str)
  87. uploader_id = try_get(
  88. data, lambda x: x['user']['uid'], compat_str)
  89. view_count = int_or_none(try_get(
  90. data, lambda x: x['meta']['views_counter']))
  91. return {
  92. 'id': video_id,
  93. 'title': title,
  94. 'duration': duration,
  95. 'uploader': uploader,
  96. 'uploader_id': uploader_id,
  97. 'view_count': view_count,
  98. 'formats': formats,
  99. }