googledrive.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. from .common import InfoExtractor
  2. from ..utils import RegexNotFoundError
  3. class GoogleDriveIE(InfoExtractor):
  4. _VALID_URL = r'(?:https?://)?(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/))(?P<id>.+?)(?:&|/|$)'
  5. _TEST = {
  6. 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
  7. 'info_dict': {
  8. 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
  9. 'ext': 'mp4',
  10. 'title': 'Big Buck Bunny.mp4',
  11. }
  12. }
  13. _formats = {
  14. '5': {'ext': 'flv'},
  15. '6': {'ext': 'flv'},
  16. '13': {'ext': '3gp'},
  17. '17': {'ext': '3gp'},
  18. '18': {'ext': 'mp4'},
  19. '22': {'ext': 'mp4'},
  20. '34': {'ext': 'flv'},
  21. '35': {'ext': 'flv'},
  22. '36': {'ext': '3gp'},
  23. '37': {'ext': 'mp4'},
  24. '38': {'ext': 'mp4'},
  25. '43': {'ext': 'webm'},
  26. '44': {'ext': 'webm'},
  27. '45': {'ext': 'webm'},
  28. '46': {'ext': 'webm'},
  29. '59': {'ext': 'mp4'}
  30. }
  31. def _real_extract(self, url):
  32. video_id = self._match_id(url)
  33. webpage = self._download_webpage(
  34. 'http://docs.google.com/file/d/'+video_id, video_id, encoding='unicode_escape'
  35. )
  36. try:
  37. title = self._html_search_regex(
  38. r'"title","(?P<title>.*?)"',
  39. webpage,
  40. 'title',
  41. group='title'
  42. )
  43. fmt_stream_map = self._html_search_regex(
  44. r'"fmt_stream_map","(?P<fmt_stream_map>.*?)"',
  45. webpage,
  46. 'fmt_stream_map',
  47. group='fmt_stream_map'
  48. )
  49. fmt_list = self._html_search_regex(
  50. r'"fmt_list","(?P<fmt_list>.*?)"',
  51. webpage,
  52. 'fmt_list',
  53. group='fmt_list'
  54. )
  55. # timestamp = self._html_search_regex(
  56. # r'"timestamp","(?P<timestamp>.*?)"',
  57. # webpage,
  58. # 'timestamp',
  59. # group='timestamp'
  60. # )
  61. length_seconds = self._html_search_regex(
  62. r'"length_seconds","(?P<length_seconds>.*?)"',
  63. webpage,
  64. 'length_seconds',
  65. group='length_seconds'
  66. )
  67. except RegexNotFoundError:
  68. try:
  69. reason = self._html_search_regex(
  70. r'"reason","(?P<reason>.*?)"',
  71. webpage,
  72. 'reason',
  73. group='reason'
  74. )
  75. self.report_warning(reason)
  76. return
  77. except RegexNotFoundError:
  78. self.report_warning('not a video')
  79. return
  80. fmt_stream_map = fmt_stream_map.split(',')
  81. fmt_list = fmt_list.split(',')
  82. formats = []
  83. for i in range(len(fmt_stream_map)):
  84. fmt_id, fmt_url = fmt_stream_map[i].split('|')
  85. resolution = fmt_list[i].split('/')[1]
  86. width, height = resolution.split('x')
  87. formats.append({
  88. 'url': fmt_url,
  89. 'format_id': fmt_id,
  90. 'resolution': resolution,
  91. 'width': int(width),
  92. 'height': int(height),
  93. 'ext': self._formats[fmt_id]['ext']
  94. })
  95. self._sort_formats(formats)
  96. return {
  97. 'id': video_id,
  98. 'title': title,
  99. # 'timestamp': int(timestamp),
  100. 'duration': int(length_seconds),
  101. 'formats': formats
  102. }