vk.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import json
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. ExtractorError,
  8. compat_urllib_request,
  9. compat_urllib_parse,
  10. compat_str,
  11. unescapeHTML,
  12. unified_strdate)
  13. class VKIE(InfoExtractor):
  14. IE_NAME = 'vk.com'
  15. _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))'
  16. _NETRC_MACHINE = 'vk'
  17. _TESTS = [
  18. {
  19. 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
  20. 'md5': '0deae91935c54e00003c2a00646315f0',
  21. 'info_dict': {
  22. 'id': '162222515',
  23. 'ext': 'flv',
  24. 'title': 'ProtivoGunz - Хуёвая песня',
  25. 'uploader': 're:Noize MC.*',
  26. 'duration': 195,
  27. 'upload_date': '20120212',
  28. },
  29. },
  30. {
  31. 'url': 'http://vk.com/video205387401_165548505',
  32. 'md5': '6c0aeb2e90396ba97035b9cbde548700',
  33. 'info_dict': {
  34. 'id': '165548505',
  35. 'ext': 'mp4',
  36. 'uploader': 'Tom Cruise',
  37. 'title': 'No name',
  38. 'duration': 9,
  39. 'upload_date': '20130721'
  40. }
  41. },
  42. {
  43. 'note': 'Embedded video',
  44. 'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1',
  45. 'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a',
  46. 'info_dict': {
  47. 'id': '162925554',
  48. 'ext': 'mp4',
  49. 'uploader': 'Vladimir Gavrin',
  50. 'title': 'Lin Dan',
  51. 'duration': 101,
  52. 'upload_date': '20120730',
  53. }
  54. },
  55. {
  56. # VIDEO NOW REMOVED
  57. # please update if you find a video whose URL follows the same pattern
  58. 'url': 'http://vk.com/video-8871596_164049491',
  59. 'md5': 'a590bcaf3d543576c9bd162812387666',
  60. 'note': 'Only available for registered users',
  61. 'info_dict': {
  62. 'id': '164049491',
  63. 'ext': 'mp4',
  64. 'uploader': 'Триллеры',
  65. 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
  66. 'duration': 8352,
  67. 'upload_date': '20121218'
  68. },
  69. 'skip': 'Requires vk account credentials',
  70. },
  71. {
  72. # VIDEO NOW REMOVED
  73. # please update if you find a video whose URL follows the same pattern
  74. 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
  75. 'md5': 'd82c22e449f036282d1d3f7f4d276869',
  76. 'info_dict': {
  77. 'id': '166094326',
  78. 'ext': 'mp4',
  79. 'uploader': 'Киномания - лучшее из мира кино',
  80. 'title': 'Запах женщины (1992)',
  81. 'duration': 9392,
  82. 'upload_date': '20130914'
  83. },
  84. 'skip': 'Requires vk account credentials',
  85. },
  86. {
  87. 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
  88. 'md5': '4d7a5ef8cf114dfa09577e57b2993202',
  89. 'info_dict': {
  90. 'id': '168067957',
  91. 'ext': 'mp4',
  92. 'uploader': 'Киномания - лучшее из мира кино',
  93. 'title': ' ',
  94. 'duration': 7291,
  95. 'upload_date': '20140328',
  96. },
  97. 'skip': 'Requires vk account credentials',
  98. },
  99. {
  100. 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
  101. 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
  102. 'note': 'ivi.ru embed',
  103. 'info_dict': {
  104. 'id': '60690',
  105. 'ext': 'mp4',
  106. 'title': 'Книга Илая',
  107. 'duration': 6771,
  108. 'upload_date': '20140626',
  109. },
  110. 'skip': 'Only works from Russia',
  111. },
  112. ]
  113. def _login(self):
  114. (username, password) = self._get_login_info()
  115. if username is None:
  116. return
  117. login_form = {
  118. 'act': 'login',
  119. 'role': 'al_frame',
  120. 'expire': '1',
  121. 'email': username,
  122. 'pass': password,
  123. }
  124. request = compat_urllib_request.Request('https://login.vk.com/?act=login',
  125. compat_urllib_parse.urlencode(login_form).encode('utf-8'))
  126. login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
  127. if re.search(r'onLoginFailed', login_page):
  128. raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
  129. def _real_initialize(self):
  130. self._login()
  131. def _real_extract(self, url):
  132. mobj = re.match(self._VALID_URL, url)
  133. video_id = mobj.group('videoid')
  134. if not video_id:
  135. video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
  136. info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
  137. info_page = self._download_webpage(info_url, video_id)
  138. ERRORS = {
  139. r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
  140. 'Video %s has been removed from public access due to rightholder complaint.',
  141. r'<!>Please log in or <':
  142. 'Video %s is only available for registered users, '
  143. 'use --username and --password options to provide account credentials.',
  144. '<!>Unknown error':
  145. 'Video %s does not exist.'
  146. }
  147. for error_re, error_msg in ERRORS.items():
  148. if re.search(error_re, info_page):
  149. raise ExtractorError(error_msg % video_id, expected=True)
  150. m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
  151. if m_yt is not None:
  152. self.to_screen('Youtube video detected')
  153. return self.url_result(m_yt.group(1), 'Youtube')
  154. m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
  155. if m_opts:
  156. m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
  157. if m_opts_url:
  158. opts_url = m_opts_url.group(1)
  159. if opts_url.startswith('//'):
  160. opts_url = 'http:' + opts_url
  161. return self.url_result(opts_url)
  162. data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
  163. data = json.loads(data_json)
  164. # Extract upload date
  165. upload_date = None
  166. mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
  167. if mobj is not None:
  168. x = mobj.group(1) + ' ' + mobj.group(2)
  169. upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
  170. formats = [{
  171. 'format_id': k,
  172. 'url': v,
  173. 'width': int(k[len('url'):]),
  174. } for k, v in data.items()
  175. if k.startswith('url')]
  176. self._sort_formats(formats)
  177. return {
  178. 'id': compat_str(data['vid']),
  179. 'formats': formats,
  180. 'title': unescapeHTML(data['md_title']),
  181. 'thumbnail': data.get('jpg'),
  182. 'uploader': data.get('md_author'),
  183. 'duration': data.get('duration'),
  184. 'upload_date': upload_date,
  185. }
  186. class VKUserVideosIE(InfoExtractor):
  187. IE_NAME = 'vk.com:user-videos'
  188. IE_DESC = 'vk.com:All of a user\'s videos'
  189. _VALID_URL = r'https?://(?:m\.)?vk\.com/videos([0-9]+)(?:m\?.*)?'
  190. _TEMPLATE_URL = 'https://vk.com/videos'
  191. _TEST = {
  192. 'url': 'http://vk.com/videos205387401',
  193. 'playlist_mincount': 4,
  194. }
  195. def extract_videos_from_page(self, page):
  196. ids_in_page = []
  197. for mobj in re.finditer(r'href="/video([0-9_]+)"', page):
  198. if mobj.group(1) not in ids_in_page:
  199. ids_in_page.append(mobj.group(1))
  200. return ids_in_page
  201. def _real_extract(self, url):
  202. # Extract page id
  203. mobj = re.match(self._VALID_URL, url)
  204. if mobj is None:
  205. raise ExtractorError('Invalid URL: %s' % url)
  206. # Download page and get video ids
  207. page_id = mobj.group(1)
  208. page = self._download_webpage(url, page_id)
  209. video_ids = self.extract_videos_from_page(page)
  210. self._downloader.to_screen('[vk] User videos %s: Found %i videos' % (page_id, len(video_ids)))
  211. url_entries = [self.url_result('http://vk.com/video' + video_id, 'VK', video_id=video_id)
  212. for video_id in video_ids]
  213. return self.playlist_result(url_entries, page_id)