vimeo.py 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import base64
  4. import functools
  5. import json
  6. import re
  7. import itertools
  8. from .common import InfoExtractor
  9. from ..compat import (
  10. compat_kwargs,
  11. compat_HTTPError,
  12. compat_str,
  13. compat_urlparse,
  14. )
  15. from ..utils import (
  16. clean_html,
  17. determine_ext,
  18. dict_get,
  19. ExtractorError,
  20. js_to_json,
  21. int_or_none,
  22. merge_dicts,
  23. OnDemandPagedList,
  24. parse_filesize,
  25. parse_iso8601,
  26. RegexNotFoundError,
  27. sanitized_Request,
  28. smuggle_url,
  29. std_headers,
  30. str_or_none,
  31. try_get,
  32. unified_timestamp,
  33. unsmuggle_url,
  34. urlencode_postdata,
  35. urljoin,
  36. unescapeHTML,
  37. )
  38. class VimeoBaseInfoExtractor(InfoExtractor):
  39. _NETRC_MACHINE = 'vimeo'
  40. _LOGIN_REQUIRED = False
  41. _LOGIN_URL = 'https://vimeo.com/log_in'
  42. def _login(self):
  43. username, password = self._get_login_info()
  44. if username is None:
  45. if self._LOGIN_REQUIRED:
  46. raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  47. return
  48. webpage = self._download_webpage(
  49. self._LOGIN_URL, None, 'Downloading login page')
  50. token, vuid = self._extract_xsrft_and_vuid(webpage)
  51. data = {
  52. 'action': 'login',
  53. 'email': username,
  54. 'password': password,
  55. 'service': 'vimeo',
  56. 'token': token,
  57. }
  58. self._set_vimeo_cookie('vuid', vuid)
  59. try:
  60. self._download_webpage(
  61. self._LOGIN_URL, None, 'Logging in',
  62. data=urlencode_postdata(data), headers={
  63. 'Content-Type': 'application/x-www-form-urlencoded',
  64. 'Referer': self._LOGIN_URL,
  65. })
  66. except ExtractorError as e:
  67. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 418:
  68. raise ExtractorError(
  69. 'Unable to log in: bad username or password',
  70. expected=True)
  71. raise ExtractorError('Unable to log in')
  72. def _get_video_password(self):
  73. password = self._downloader.params.get('videopassword')
  74. if password is None:
  75. raise ExtractorError(
  76. 'This video is protected by a password, use the --video-password option',
  77. expected=True)
  78. return password
  79. def _verify_video_password(self, url, video_id, password, token, vuid):
  80. if url.startswith('http://'):
  81. # vimeo only supports https now, but the user can give an http url
  82. url = url.replace('http://', 'https://')
  83. self._set_vimeo_cookie('vuid', vuid)
  84. return self._download_webpage(
  85. url + '/password', video_id, 'Verifying the password',
  86. 'Wrong password', data=urlencode_postdata({
  87. 'password': password,
  88. 'token': token,
  89. }), headers={
  90. 'Content-Type': 'application/x-www-form-urlencoded',
  91. 'Referer': url,
  92. })
  93. def _extract_xsrft_and_vuid(self, webpage):
  94. xsrft = self._search_regex(
  95. r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
  96. webpage, 'login token', group='xsrft')
  97. vuid = self._search_regex(
  98. r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
  99. webpage, 'vuid', group='vuid')
  100. return xsrft, vuid
  101. def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
  102. vimeo_config = self._search_regex(
  103. r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
  104. webpage, 'vimeo config', *args, **compat_kwargs(kwargs))
  105. if vimeo_config:
  106. return self._parse_json(vimeo_config, video_id)
  107. def _set_vimeo_cookie(self, name, value):
  108. self._set_cookie('vimeo.com', name, value)
  109. def _vimeo_sort_formats(self, formats):
  110. # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
  111. # at the same time without actual units specified. This lead to wrong sorting.
  112. self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
  113. def _parse_config(self, config, video_id):
  114. video_data = config['video']
  115. video_title = video_data['title']
  116. live_event = video_data.get('live_event') or {}
  117. is_live = live_event.get('status') == 'started'
  118. formats = []
  119. config_files = video_data.get('files') or config['request'].get('files', {})
  120. for f in config_files.get('progressive', []):
  121. video_url = f.get('url')
  122. if not video_url:
  123. continue
  124. formats.append({
  125. 'url': video_url,
  126. 'format_id': 'http-%s' % f.get('quality'),
  127. 'width': int_or_none(f.get('width')),
  128. 'height': int_or_none(f.get('height')),
  129. 'fps': int_or_none(f.get('fps')),
  130. 'tbr': int_or_none(f.get('bitrate')),
  131. })
  132. # TODO: fix handling of 308 status code returned for live archive manifest requests
  133. sep_pattern = r'/sep/video/'
  134. for files_type in ('hls', 'dash'):
  135. for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
  136. manifest_url = cdn_data.get('url')
  137. if not manifest_url:
  138. continue
  139. format_id = '%s-%s' % (files_type, cdn_name)
  140. sep_manifest_urls = []
  141. if re.search(sep_pattern, manifest_url):
  142. for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
  143. sep_manifest_urls.append((format_id + suffix, re.sub(
  144. sep_pattern, '/%s/' % repl, manifest_url)))
  145. else:
  146. sep_manifest_urls = [(format_id, manifest_url)]
  147. for f_id, m_url in sep_manifest_urls:
  148. if files_type == 'hls':
  149. formats.extend(self._extract_m3u8_formats(
  150. m_url, video_id, 'mp4',
  151. 'm3u8' if is_live else 'm3u8_native', m3u8_id=f_id,
  152. note='Downloading %s m3u8 information' % cdn_name,
  153. fatal=False))
  154. elif files_type == 'dash':
  155. if 'json=1' in m_url:
  156. real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url')
  157. if real_m_url:
  158. m_url = real_m_url
  159. mpd_formats = self._extract_mpd_formats(
  160. m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
  161. 'Downloading %s MPD information' % cdn_name,
  162. fatal=False)
  163. formats.extend(mpd_formats)
  164. live_archive = live_event.get('archive') or {}
  165. live_archive_source_url = live_archive.get('source_url')
  166. if live_archive_source_url and live_archive.get('status') == 'done':
  167. formats.append({
  168. 'format_id': 'live-archive-source',
  169. 'url': live_archive_source_url,
  170. 'preference': 1,
  171. })
  172. for f in formats:
  173. if f.get('vcodec') == 'none':
  174. f['preference'] = -50
  175. elif f.get('acodec') == 'none':
  176. f['preference'] = -40
  177. subtitles = {}
  178. text_tracks = config['request'].get('text_tracks')
  179. if text_tracks:
  180. for tt in text_tracks:
  181. subtitles[tt['lang']] = [{
  182. 'ext': 'vtt',
  183. 'url': urljoin('https://vimeo.com', tt['url']),
  184. }]
  185. thumbnails = []
  186. if not is_live:
  187. for key, thumb in video_data.get('thumbs', {}).items():
  188. thumbnails.append({
  189. 'id': key,
  190. 'width': int_or_none(key),
  191. 'url': thumb,
  192. })
  193. thumbnail = video_data.get('thumbnail')
  194. if thumbnail:
  195. thumbnails.append({
  196. 'url': thumbnail,
  197. })
  198. owner = video_data.get('owner') or {}
  199. video_uploader_url = owner.get('url')
  200. return {
  201. 'id': str_or_none(video_data.get('id')) or video_id,
  202. 'title': self._live_title(video_title) if is_live else video_title,
  203. 'uploader': owner.get('name'),
  204. 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
  205. 'uploader_url': video_uploader_url,
  206. 'thumbnails': thumbnails,
  207. 'duration': int_or_none(video_data.get('duration')),
  208. 'formats': formats,
  209. 'subtitles': subtitles,
  210. 'is_live': is_live,
  211. }
  212. def _extract_original_format(self, url, video_id, unlisted_hash=None):
  213. query = {'action': 'load_download_config'}
  214. if unlisted_hash:
  215. query['unlisted_hash'] = unlisted_hash
  216. download_data = self._download_json(
  217. url, video_id, fatal=False, query=query,
  218. headers={'X-Requested-With': 'XMLHttpRequest'})
  219. if download_data:
  220. source_file = download_data.get('source_file')
  221. if isinstance(source_file, dict):
  222. download_url = source_file.get('download_url')
  223. if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
  224. source_name = source_file.get('public_name', 'Original')
  225. if self._is_valid_url(download_url, video_id, '%s video' % source_name):
  226. ext = (try_get(
  227. source_file, lambda x: x['extension'],
  228. compat_str) or determine_ext(
  229. download_url, None) or 'mp4').lower()
  230. return {
  231. 'url': download_url,
  232. 'ext': ext,
  233. 'width': int_or_none(source_file.get('width')),
  234. 'height': int_or_none(source_file.get('height')),
  235. 'filesize': parse_filesize(source_file.get('size')),
  236. 'format_id': source_name,
  237. 'preference': 1,
  238. }
  239. class VimeoIE(VimeoBaseInfoExtractor):
  240. """Information extractor for vimeo.com."""
  241. # _VALID_URL matches Vimeo URLs
  242. _VALID_URL = r'''(?x)
  243. https?://
  244. (?:
  245. (?:
  246. www|
  247. player
  248. )
  249. \.
  250. )?
  251. vimeo(?:pro)?\.com/
  252. (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
  253. (?:.*?/)?
  254. (?:
  255. (?:
  256. play_redirect_hls|
  257. moogaloop\.swf)\?clip_id=
  258. )?
  259. (?:videos?/)?
  260. (?P<id>[0-9]+)
  261. (?:/(?P<unlisted_hash>[\da-f]{10}))?
  262. /?(?:[?&].*)?(?:[#].*)?$
  263. '''
  264. IE_NAME = 'vimeo'
  265. _TESTS = [
  266. {
  267. 'url': 'http://vimeo.com/56015672#at=0',
  268. 'md5': '8879b6cc097e987f02484baf890129e5',
  269. 'info_dict': {
  270. 'id': '56015672',
  271. 'ext': 'mp4',
  272. 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
  273. 'description': 'md5:2d3305bad981a06ff79f027f19865021',
  274. 'timestamp': 1355990239,
  275. 'upload_date': '20121220',
  276. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
  277. 'uploader_id': 'user7108434',
  278. 'uploader': 'Filippo Valsorda',
  279. 'duration': 10,
  280. 'license': 'by-sa',
  281. },
  282. 'params': {
  283. 'format': 'best[protocol=https]',
  284. },
  285. },
  286. {
  287. 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
  288. 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
  289. 'note': 'Vimeo Pro video (#1197)',
  290. 'info_dict': {
  291. 'id': '68093876',
  292. 'ext': 'mp4',
  293. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
  294. 'uploader_id': 'openstreetmapus',
  295. 'uploader': 'OpenStreetMap US',
  296. 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
  297. 'description': 'md5:2c362968038d4499f4d79f88458590c1',
  298. 'duration': 1595,
  299. 'upload_date': '20130610',
  300. 'timestamp': 1370893156,
  301. },
  302. 'params': {
  303. 'format': 'best[protocol=https]',
  304. },
  305. },
  306. {
  307. 'url': 'http://player.vimeo.com/video/54469442',
  308. 'md5': '619b811a4417aa4abe78dc653becf511',
  309. 'note': 'Videos that embed the url in the player page',
  310. 'info_dict': {
  311. 'id': '54469442',
  312. 'ext': 'mp4',
  313. 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
  314. 'uploader': 'Business of Software',
  315. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/businessofsoftware',
  316. 'uploader_id': 'businessofsoftware',
  317. 'duration': 3610,
  318. 'description': None,
  319. },
  320. 'params': {
  321. 'format': 'best[protocol=https]',
  322. },
  323. 'expected_warnings': ['Unable to download JSON metadata'],
  324. },
  325. {
  326. 'url': 'http://vimeo.com/68375962',
  327. 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
  328. 'note': 'Video protected with password',
  329. 'info_dict': {
  330. 'id': '68375962',
  331. 'ext': 'mp4',
  332. 'title': 'youtube-dl password protected test video',
  333. 'timestamp': 1371200155,
  334. 'upload_date': '20130614',
  335. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
  336. 'uploader_id': 'user18948128',
  337. 'uploader': 'Jaime Marquínez Ferrándiz',
  338. 'duration': 10,
  339. 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f',
  340. },
  341. 'params': {
  342. 'format': 'best[protocol=https]',
  343. 'videopassword': 'youtube-dl',
  344. },
  345. },
  346. {
  347. 'url': 'http://vimeo.com/channels/keypeele/75629013',
  348. 'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
  349. 'info_dict': {
  350. 'id': '75629013',
  351. 'ext': 'mp4',
  352. 'title': 'Key & Peele: Terrorist Interrogation',
  353. 'description': 'md5:8678b246399b070816b12313e8b4eb5c',
  354. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
  355. 'uploader_id': 'atencio',
  356. 'uploader': 'Peter Atencio',
  357. 'channel_id': 'keypeele',
  358. 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele',
  359. 'timestamp': 1380339469,
  360. 'upload_date': '20130928',
  361. 'duration': 187,
  362. },
  363. 'expected_warnings': ['Unable to download JSON metadata'],
  364. },
  365. {
  366. 'url': 'http://vimeo.com/76979871',
  367. 'note': 'Video with subtitles',
  368. 'info_dict': {
  369. 'id': '76979871',
  370. 'ext': 'mp4',
  371. 'title': 'The New Vimeo Player (You Know, For Videos)',
  372. 'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
  373. 'timestamp': 1381846109,
  374. 'upload_date': '20131015',
  375. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
  376. 'uploader_id': 'staff',
  377. 'uploader': 'Vimeo Staff',
  378. 'duration': 62,
  379. }
  380. },
  381. {
  382. # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
  383. 'url': 'https://player.vimeo.com/video/98044508',
  384. 'note': 'The js code contains assignments to the same variable as the config',
  385. 'info_dict': {
  386. 'id': '98044508',
  387. 'ext': 'mp4',
  388. 'title': 'Pier Solar OUYA Official Trailer',
  389. 'uploader': 'Tulio Gonçalves',
  390. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user28849593',
  391. 'uploader_id': 'user28849593',
  392. },
  393. },
  394. {
  395. # contains original format
  396. 'url': 'https://vimeo.com/33951933',
  397. 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
  398. 'info_dict': {
  399. 'id': '33951933',
  400. 'ext': 'mp4',
  401. 'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
  402. 'uploader': 'The DMCI',
  403. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
  404. 'uploader_id': 'dmci',
  405. 'timestamp': 1324343742,
  406. 'upload_date': '20111220',
  407. 'description': 'md5:ae23671e82d05415868f7ad1aec21147',
  408. },
  409. },
  410. {
  411. # only available via https://vimeo.com/channels/tributes/6213729 and
  412. # not via https://vimeo.com/6213729
  413. 'url': 'https://vimeo.com/channels/tributes/6213729',
  414. 'info_dict': {
  415. 'id': '6213729',
  416. 'ext': 'mp4',
  417. 'title': 'Vimeo Tribute: The Shining',
  418. 'uploader': 'Casey Donahue',
  419. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
  420. 'uploader_id': 'caseydonahue',
  421. 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes',
  422. 'channel_id': 'tributes',
  423. 'timestamp': 1250886430,
  424. 'upload_date': '20090821',
  425. 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
  426. },
  427. 'params': {
  428. 'skip_download': True,
  429. },
  430. 'expected_warnings': ['Unable to download JSON metadata'],
  431. },
  432. {
  433. # redirects to ondemand extractor and should be passed through it
  434. # for successful extraction
  435. 'url': 'https://vimeo.com/73445910',
  436. 'info_dict': {
  437. 'id': '73445910',
  438. 'ext': 'mp4',
  439. 'title': 'The Reluctant Revolutionary',
  440. 'uploader': '10Ft Films',
  441. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
  442. 'uploader_id': 'tenfootfilms',
  443. 'description': 'md5:0fa704e05b04f91f40b7f3ca2e801384',
  444. 'upload_date': '20130830',
  445. 'timestamp': 1377853339,
  446. },
  447. 'params': {
  448. 'skip_download': True,
  449. },
  450. 'expected_warnings': ['Unable to download JSON metadata'],
  451. 'skip': 'this page is no longer available.',
  452. },
  453. {
  454. 'url': 'http://player.vimeo.com/video/68375962',
  455. 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
  456. 'info_dict': {
  457. 'id': '68375962',
  458. 'ext': 'mp4',
  459. 'title': 'youtube-dl password protected test video',
  460. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
  461. 'uploader_id': 'user18948128',
  462. 'uploader': 'Jaime Marquínez Ferrándiz',
  463. 'duration': 10,
  464. },
  465. 'params': {
  466. 'format': 'best[protocol=https]',
  467. 'videopassword': 'youtube-dl',
  468. },
  469. },
  470. {
  471. 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
  472. 'only_matching': True,
  473. },
  474. {
  475. 'url': 'https://vimeo.com/109815029',
  476. 'note': 'Video not completely processed, "failed" seed status',
  477. 'only_matching': True,
  478. },
  479. {
  480. 'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
  481. 'only_matching': True,
  482. },
  483. {
  484. 'url': 'https://vimeo.com/album/2632481/video/79010983',
  485. 'only_matching': True,
  486. },
  487. {
  488. # source file returns 403: Forbidden
  489. 'url': 'https://vimeo.com/7809605',
  490. 'only_matching': True,
  491. },
  492. {
  493. 'url': 'https://vimeo.com/160743502/abd0e13fb4',
  494. 'only_matching': True,
  495. },
  496. {
  497. # requires passing unlisted_hash(a52724358e) to load_download_config request
  498. 'url': 'https://vimeo.com/392479337/a52724358e',
  499. 'only_matching': True,
  500. }
  501. # https://gettingthingsdone.com/workflowmap/
  502. # vimeo embed with check-password page protected by Referer header
  503. ]
  504. @staticmethod
  505. def _smuggle_referrer(url, referrer_url):
  506. return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
  507. @staticmethod
  508. def _extract_urls(url, webpage):
  509. urls = []
  510. # Look for embedded (iframe) Vimeo player
  511. for mobj in re.finditer(
  512. r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
  513. webpage):
  514. urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
  515. PLAIN_EMBED_RE = (
  516. # Look for embedded (swf embed) Vimeo player
  517. r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
  518. # Look more for non-standard embedded Vimeo player
  519. r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
  520. )
  521. for embed_re in PLAIN_EMBED_RE:
  522. for mobj in re.finditer(embed_re, webpage):
  523. urls.append(mobj.group('url'))
  524. return urls
  525. @staticmethod
  526. def _extract_url(url, webpage):
  527. urls = VimeoIE._extract_urls(url, webpage)
  528. return urls[0] if urls else None
  529. def _verify_player_video_password(self, url, video_id, headers):
  530. password = self._get_video_password()
  531. data = urlencode_postdata({
  532. 'password': base64.b64encode(password.encode()),
  533. })
  534. headers = merge_dicts(headers, {
  535. 'Content-Type': 'application/x-www-form-urlencoded',
  536. })
  537. checked = self._download_json(
  538. url + '/check-password', video_id,
  539. 'Verifying the password', data=data, headers=headers)
  540. if checked is False:
  541. raise ExtractorError('Wrong video password', expected=True)
  542. return checked
  543. def _real_initialize(self):
  544. self._login()
  545. def _real_extract(self, url):
  546. url, data = unsmuggle_url(url, {})
  547. headers = std_headers.copy()
  548. if 'http_headers' in data:
  549. headers.update(data['http_headers'])
  550. if 'Referer' not in headers:
  551. headers['Referer'] = url
  552. # Extract ID from URL
  553. video_id, unlisted_hash = re.match(self._VALID_URL, url).groups()
  554. if unlisted_hash:
  555. token = self._download_json(
  556. 'https://vimeo.com/_rv/jwt', video_id, headers={
  557. 'X-Requested-With': 'XMLHttpRequest'
  558. })['token']
  559. video = self._download_json(
  560. 'https://api.vimeo.com/videos/%s:%s' % (video_id, unlisted_hash),
  561. video_id, headers={
  562. 'Authorization': 'jwt ' + token,
  563. }, query={
  564. 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
  565. })
  566. info = self._parse_config(self._download_json(
  567. video['config_url'], video_id), video_id)
  568. self._vimeo_sort_formats(info['formats'])
  569. get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
  570. info.update({
  571. 'description': video.get('description'),
  572. 'license': video.get('license'),
  573. 'release_timestamp': get_timestamp('release'),
  574. 'timestamp': get_timestamp('created'),
  575. 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
  576. })
  577. connections = try_get(
  578. video, lambda x: x['metadata']['connections'], dict) or {}
  579. for k in ('comment', 'like'):
  580. info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
  581. return info
  582. orig_url = url
  583. is_pro = 'vimeopro.com/' in url
  584. is_player = '://player.vimeo.com/video/' in url
  585. if is_pro:
  586. # some videos require portfolio_id to be present in player url
  587. # https://github.com/ytdl-org/youtube-dl/issues/20070
  588. url = self._extract_url(url, self._download_webpage(url, video_id))
  589. if not url:
  590. url = 'https://vimeo.com/' + video_id
  591. elif is_player:
  592. url = 'https://player.vimeo.com/video/' + video_id
  593. elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
  594. url = 'https://vimeo.com/' + video_id
  595. try:
  596. # Retrieve video webpage to extract further information
  597. webpage, urlh = self._download_webpage_handle(
  598. url, video_id, headers=headers)
  599. redirect_url = urlh.geturl()
  600. except ExtractorError as ee:
  601. if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
  602. errmsg = ee.cause.read()
  603. if b'Because of its privacy settings, this video cannot be played here' in errmsg:
  604. raise ExtractorError(
  605. 'Cannot download embed-only video without embedding '
  606. 'URL. Please call youtube-dl with the URL of the page '
  607. 'that embeds this video.',
  608. expected=True)
  609. raise
  610. # Now we begin extracting as much information as we can from what we
  611. # retrieved. First we extract the information common to all extractors,
  612. # and latter we extract those that are Vimeo specific.
  613. self.report_extraction(video_id)
  614. vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
  615. if vimeo_config:
  616. seed_status = vimeo_config.get('seed_status', {})
  617. if seed_status.get('state') == 'failed':
  618. raise ExtractorError(
  619. '%s said: %s' % (self.IE_NAME, seed_status['title']),
  620. expected=True)
  621. cc_license = None
  622. timestamp = None
  623. video_description = None
  624. # Extract the config JSON
  625. try:
  626. try:
  627. config_url = self._html_search_regex(
  628. r' data-config-url="(.+?)"', webpage,
  629. 'config URL', default=None)
  630. if not config_url:
  631. # Sometimes new react-based page is served instead of old one that require
  632. # different config URL extraction approach (see
  633. # https://github.com/ytdl-org/youtube-dl/pull/7209)
  634. page_config = self._parse_json(self._search_regex(
  635. r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
  636. webpage, 'page config'), video_id)
  637. config_url = page_config['player']['config_url']
  638. cc_license = page_config.get('cc_license')
  639. timestamp = try_get(
  640. page_config, lambda x: x['clip']['uploaded_on'],
  641. compat_str)
  642. video_description = clean_html(dict_get(
  643. page_config, ('description', 'description_html_escaped')))
  644. config = self._download_json(config_url, video_id)
  645. except RegexNotFoundError:
  646. # For pro videos or player.vimeo.com urls
  647. # We try to find out to which variable is assigned the config dic
  648. m_variable_name = re.search(r'(\w)\.video\.id', webpage)
  649. if m_variable_name is not None:
  650. config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
  651. else:
  652. config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
  653. config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
  654. config_re.append(r'\bconfig\s*=\s*({.+?})\s*;')
  655. config = self._search_regex(config_re, webpage, 'info section',
  656. flags=re.DOTALL)
  657. config = json.loads(config)
  658. except Exception as e:
  659. if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
  660. raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
  661. if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
  662. if '_video_password_verified' in data:
  663. raise ExtractorError('video password verification failed!')
  664. video_password = self._get_video_password()
  665. token, vuid = self._extract_xsrft_and_vuid(webpage)
  666. self._verify_video_password(
  667. redirect_url, video_id, video_password, token, vuid)
  668. return self._real_extract(
  669. smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
  670. else:
  671. raise ExtractorError('Unable to extract info section',
  672. cause=e)
  673. else:
  674. if config.get('view') == 4:
  675. config = self._verify_player_video_password(redirect_url, video_id, headers)
  676. video = config.get('video') or {}
  677. vod = video.get('vod') or {}
  678. def is_rented():
  679. if '>You rented this title.<' in webpage:
  680. return True
  681. if config.get('user', {}).get('purchased'):
  682. return True
  683. for purchase_option in vod.get('purchase_options', []):
  684. if purchase_option.get('purchased'):
  685. return True
  686. label = purchase_option.get('label_string')
  687. if label and (label.startswith('You rented this') or label.endswith(' remaining')):
  688. return True
  689. return False
  690. if is_rented() and vod.get('is_trailer'):
  691. feature_id = vod.get('feature_id')
  692. if feature_id and not data.get('force_feature_id', False):
  693. return self.url_result(smuggle_url(
  694. 'https://player.vimeo.com/player/%s' % feature_id,
  695. {'force_feature_id': True}), 'Vimeo')
  696. # Extract video description
  697. if not video_description:
  698. video_description = self._html_search_regex(
  699. r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
  700. webpage, 'description', default=None)
  701. if not video_description:
  702. video_description = self._html_search_meta(
  703. 'description', webpage, default=None)
  704. if not video_description and is_pro:
  705. orig_webpage = self._download_webpage(
  706. orig_url, video_id,
  707. note='Downloading webpage for description',
  708. fatal=False)
  709. if orig_webpage:
  710. video_description = self._html_search_meta(
  711. 'description', orig_webpage, default=None)
  712. if not video_description and not is_player:
  713. self._downloader.report_warning('Cannot find video description')
  714. # Extract upload date
  715. if not timestamp:
  716. timestamp = self._search_regex(
  717. r'<time[^>]+datetime="([^"]+)"', webpage,
  718. 'timestamp', default=None)
  719. try:
  720. view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
  721. like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count'))
  722. comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count'))
  723. except RegexNotFoundError:
  724. # This info is only available in vimeo.com/{id} urls
  725. view_count = None
  726. like_count = None
  727. comment_count = None
  728. formats = []
  729. source_format = self._extract_original_format(
  730. 'https://vimeo.com/' + video_id, video_id, video.get('unlisted_hash'))
  731. if source_format:
  732. formats.append(source_format)
  733. info_dict_config = self._parse_config(config, video_id)
  734. formats.extend(info_dict_config['formats'])
  735. self._vimeo_sort_formats(formats)
  736. json_ld = self._search_json_ld(webpage, video_id, default={})
  737. if not cc_license:
  738. cc_license = self._search_regex(
  739. r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
  740. webpage, 'license', default=None, group='license')
  741. channel_id = self._search_regex(
  742. r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
  743. channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
  744. info_dict = {
  745. 'formats': formats,
  746. 'timestamp': unified_timestamp(timestamp),
  747. 'description': video_description,
  748. 'webpage_url': url,
  749. 'view_count': view_count,
  750. 'like_count': like_count,
  751. 'comment_count': comment_count,
  752. 'license': cc_license,
  753. 'channel_id': channel_id,
  754. 'channel_url': channel_url,
  755. }
  756. info_dict = merge_dicts(info_dict, info_dict_config, json_ld)
  757. return info_dict
  758. class VimeoOndemandIE(VimeoIE):
  759. IE_NAME = 'vimeo:ondemand'
  760. _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/([^/]+/)?(?P<id>[^/?#&]+)'
  761. _TESTS = [{
  762. # ondemand video not available via https://vimeo.com/id
  763. 'url': 'https://vimeo.com/ondemand/20704',
  764. 'md5': 'c424deda8c7f73c1dfb3edd7630e2f35',
  765. 'info_dict': {
  766. 'id': '105442900',
  767. 'ext': 'mp4',
  768. 'title': 'המעבדה - במאי יותם פלדמן',
  769. 'uploader': 'גם סרטים',
  770. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms',
  771. 'uploader_id': 'gumfilms',
  772. 'description': 'md5:4c027c965e439de4baab621e48b60791',
  773. 'upload_date': '20140906',
  774. 'timestamp': 1410032453,
  775. },
  776. 'params': {
  777. 'format': 'best[protocol=https]',
  778. },
  779. 'expected_warnings': ['Unable to download JSON metadata'],
  780. }, {
  781. # requires Referer to be passed along with og:video:url
  782. 'url': 'https://vimeo.com/ondemand/36938/126682985',
  783. 'info_dict': {
  784. 'id': '126584684',
  785. 'ext': 'mp4',
  786. 'title': 'Rävlock, rätt läte på rätt plats',
  787. 'uploader': 'Lindroth & Norin',
  788. 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/lindrothnorin',
  789. 'uploader_id': 'lindrothnorin',
  790. 'description': 'md5:c3c46a90529612c8279fb6af803fc0df',
  791. 'upload_date': '20150502',
  792. 'timestamp': 1430586422,
  793. },
  794. 'params': {
  795. 'skip_download': True,
  796. },
  797. 'expected_warnings': ['Unable to download JSON metadata'],
  798. }, {
  799. 'url': 'https://vimeo.com/ondemand/nazmaalik',
  800. 'only_matching': True,
  801. }, {
  802. 'url': 'https://vimeo.com/ondemand/141692381',
  803. 'only_matching': True,
  804. }, {
  805. 'url': 'https://vimeo.com/ondemand/thelastcolony/150274832',
  806. 'only_matching': True,
  807. }]
  808. class VimeoChannelIE(VimeoBaseInfoExtractor):
  809. IE_NAME = 'vimeo:channel'
  810. _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
  811. _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
  812. _TITLE = None
  813. _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
  814. _TESTS = [{
  815. 'url': 'https://vimeo.com/channels/tributes',
  816. 'info_dict': {
  817. 'id': 'tributes',
  818. 'title': 'Vimeo Tributes',
  819. },
  820. 'playlist_mincount': 25,
  821. }]
  822. _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s'
  823. def _page_url(self, base_url, pagenum):
  824. return '%s/videos/page:%d/' % (base_url, pagenum)
  825. def _extract_list_title(self, webpage):
  826. return self._TITLE or self._html_search_regex(
  827. self._TITLE_RE, webpage, 'list title', fatal=False)
  828. def _title_and_entries(self, list_id, base_url):
  829. for pagenum in itertools.count(1):
  830. page_url = self._page_url(base_url, pagenum)
  831. webpage = self._download_webpage(
  832. page_url, list_id,
  833. 'Downloading page %s' % pagenum)
  834. if pagenum == 1:
  835. yield self._extract_list_title(webpage)
  836. # Try extracting href first since not all videos are available via
  837. # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
  838. clips = re.findall(
  839. r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage)
  840. if clips:
  841. for video_id, video_url, video_title in clips:
  842. yield self.url_result(
  843. compat_urlparse.urljoin(base_url, video_url),
  844. VimeoIE.ie_key(), video_id=video_id, video_title=video_title)
  845. # More relaxed fallback
  846. else:
  847. for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
  848. yield self.url_result(
  849. 'https://vimeo.com/%s' % video_id,
  850. VimeoIE.ie_key(), video_id=video_id)
  851. if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
  852. break
  853. def _extract_videos(self, list_id, base_url):
  854. title_and_entries = self._title_and_entries(list_id, base_url)
  855. list_title = next(title_and_entries)
  856. return self.playlist_result(title_and_entries, list_id, list_title)
  857. def _real_extract(self, url):
  858. channel_id = self._match_id(url)
  859. return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id)
  860. class VimeoUserIE(VimeoChannelIE):
  861. IE_NAME = 'vimeo:user'
  862. _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos|[#?]|$)'
  863. _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
  864. _TESTS = [{
  865. 'url': 'https://vimeo.com/nkistudio/videos',
  866. 'info_dict': {
  867. 'title': 'Nki',
  868. 'id': 'nkistudio',
  869. },
  870. 'playlist_mincount': 66,
  871. }]
  872. _BASE_URL_TEMPL = 'https://vimeo.com/%s'
  873. class VimeoAlbumIE(VimeoBaseInfoExtractor):
  874. IE_NAME = 'vimeo:album'
  875. _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
  876. _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
  877. _TESTS = [{
  878. 'url': 'https://vimeo.com/album/2632481',
  879. 'info_dict': {
  880. 'id': '2632481',
  881. 'title': 'Staff Favorites: November 2013',
  882. },
  883. 'playlist_mincount': 13,
  884. }, {
  885. 'note': 'Password-protected album',
  886. 'url': 'https://vimeo.com/album/3253534',
  887. 'info_dict': {
  888. 'title': 'test',
  889. 'id': '3253534',
  890. },
  891. 'playlist_count': 1,
  892. 'params': {
  893. 'videopassword': 'youtube-dl',
  894. }
  895. }]
  896. _PAGE_SIZE = 100
  897. def _fetch_page(self, album_id, authorization, hashed_pass, page):
  898. api_page = page + 1
  899. query = {
  900. 'fields': 'link,uri',
  901. 'page': api_page,
  902. 'per_page': self._PAGE_SIZE,
  903. }
  904. if hashed_pass:
  905. query['_hashed_pass'] = hashed_pass
  906. try:
  907. videos = self._download_json(
  908. 'https://api.vimeo.com/albums/%s/videos' % album_id,
  909. album_id, 'Downloading page %d' % api_page, query=query, headers={
  910. 'Authorization': 'jwt ' + authorization,
  911. })['data']
  912. except ExtractorError as e:
  913. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
  914. return
  915. for video in videos:
  916. link = video.get('link')
  917. if not link:
  918. continue
  919. uri = video.get('uri')
  920. video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None
  921. yield self.url_result(link, VimeoIE.ie_key(), video_id)
  922. def _real_extract(self, url):
  923. album_id = self._match_id(url)
  924. viewer = self._download_json(
  925. 'https://vimeo.com/_rv/viewer', album_id, fatal=False)
  926. if not viewer:
  927. webpage = self._download_webpage(url, album_id)
  928. viewer = self._parse_json(self._search_regex(
  929. r'bootstrap_data\s*=\s*({.+?})</script>',
  930. webpage, 'bootstrap data'), album_id)['viewer']
  931. jwt = viewer['jwt']
  932. album = self._download_json(
  933. 'https://api.vimeo.com/albums/' + album_id,
  934. album_id, headers={'Authorization': 'jwt ' + jwt},
  935. query={'fields': 'description,name,privacy'})
  936. hashed_pass = None
  937. if try_get(album, lambda x: x['privacy']['view']) == 'password':
  938. password = self._downloader.params.get('videopassword')
  939. if not password:
  940. raise ExtractorError(
  941. 'This album is protected by a password, use the --video-password option',
  942. expected=True)
  943. self._set_vimeo_cookie('vuid', viewer['vuid'])
  944. try:
  945. hashed_pass = self._download_json(
  946. 'https://vimeo.com/showcase/%s/auth' % album_id,
  947. album_id, 'Verifying the password', data=urlencode_postdata({
  948. 'password': password,
  949. 'token': viewer['xsrft'],
  950. }), headers={
  951. 'X-Requested-With': 'XMLHttpRequest',
  952. })['hashed_pass']
  953. except ExtractorError as e:
  954. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
  955. raise ExtractorError('Wrong password', expected=True)
  956. raise
  957. entries = OnDemandPagedList(functools.partial(
  958. self._fetch_page, album_id, jwt, hashed_pass), self._PAGE_SIZE)
  959. return self.playlist_result(
  960. entries, album_id, album.get('name'), album.get('description'))
  961. class VimeoGroupsIE(VimeoChannelIE):
  962. IE_NAME = 'vimeo:group'
  963. _VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)'
  964. _TESTS = [{
  965. 'url': 'https://vimeo.com/groups/kattykay',
  966. 'info_dict': {
  967. 'id': 'kattykay',
  968. 'title': 'Katty Kay',
  969. },
  970. 'playlist_mincount': 27,
  971. }]
  972. _BASE_URL_TEMPL = 'https://vimeo.com/groups/%s'
  973. class VimeoReviewIE(VimeoBaseInfoExtractor):
  974. IE_NAME = 'vimeo:review'
  975. IE_DESC = 'Review pages on vimeo'
  976. _VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})'
  977. _TESTS = [{
  978. 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
  979. 'md5': 'c507a72f780cacc12b2248bb4006d253',
  980. 'info_dict': {
  981. 'id': '75524534',
  982. 'ext': 'mp4',
  983. 'title': "DICK HARDWICK 'Comedian'",
  984. 'uploader': 'Richard Hardwick',
  985. 'uploader_id': 'user21297594',
  986. 'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks",
  987. },
  988. 'expected_warnings': ['Unable to download JSON metadata'],
  989. }, {
  990. 'note': 'video player needs Referer',
  991. 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
  992. 'md5': '6295fdab8f4bf6a002d058b2c6dce276',
  993. 'info_dict': {
  994. 'id': '91613211',
  995. 'ext': 'mp4',
  996. 'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',
  997. 'uploader': 'DevWeek Events',
  998. 'duration': 2773,
  999. 'thumbnail': r're:^https?://.*\.jpg$',
  1000. 'uploader_id': 'user22258446',
  1001. },
  1002. 'skip': 'video gone',
  1003. }, {
  1004. 'note': 'Password protected',
  1005. 'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
  1006. 'info_dict': {
  1007. 'id': '138823582',
  1008. 'ext': 'mp4',
  1009. 'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
  1010. 'uploader': 'TMB',
  1011. 'uploader_id': 'user37284429',
  1012. },
  1013. 'params': {
  1014. 'videopassword': 'holygrail',
  1015. },
  1016. 'skip': 'video gone',
  1017. }]
  1018. def _real_initialize(self):
  1019. self._login()
  1020. def _real_extract(self, url):
  1021. page_url, video_id = re.match(self._VALID_URL, url).groups()
  1022. data = self._download_json(
  1023. page_url.replace('/review/', '/review/data/'), video_id)
  1024. if data.get('isLocked') is True:
  1025. video_password = self._get_video_password()
  1026. viewer = self._download_json(
  1027. 'https://vimeo.com/_rv/viewer', video_id)
  1028. webpage = self._verify_video_password(
  1029. 'https://vimeo.com/' + video_id, video_id,
  1030. video_password, viewer['xsrft'], viewer['vuid'])
  1031. clip_page_config = self._parse_json(self._search_regex(
  1032. r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
  1033. webpage, 'clip page config'), video_id)
  1034. config_url = clip_page_config['player']['config_url']
  1035. clip_data = clip_page_config.get('clip') or {}
  1036. else:
  1037. clip_data = data['clipData']
  1038. config_url = clip_data['configUrl']
  1039. config = self._download_json(config_url, video_id)
  1040. info_dict = self._parse_config(config, video_id)
  1041. source_format = self._extract_original_format(
  1042. page_url + '/action', video_id)
  1043. if source_format:
  1044. info_dict['formats'].append(source_format)
  1045. self._vimeo_sort_formats(info_dict['formats'])
  1046. info_dict['description'] = clean_html(clip_data.get('description'))
  1047. return info_dict
  1048. class VimeoWatchLaterIE(VimeoChannelIE):
  1049. IE_NAME = 'vimeo:watchlater'
  1050. IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
  1051. _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
  1052. _TITLE = 'Watch Later'
  1053. _LOGIN_REQUIRED = True
  1054. _TESTS = [{
  1055. 'url': 'https://vimeo.com/watchlater',
  1056. 'only_matching': True,
  1057. }]
  1058. def _real_initialize(self):
  1059. self._login()
  1060. def _page_url(self, base_url, pagenum):
  1061. url = '%s/page:%d/' % (base_url, pagenum)
  1062. request = sanitized_Request(url)
  1063. # Set the header to get a partial html page with the ids,
  1064. # the normal page doesn't contain them.
  1065. request.add_header('X-Requested-With', 'XMLHttpRequest')
  1066. return request
  1067. def _real_extract(self, url):
  1068. return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
  1069. class VimeoLikesIE(VimeoChannelIE):
  1070. _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
  1071. IE_NAME = 'vimeo:likes'
  1072. IE_DESC = 'Vimeo user likes'
  1073. _TESTS = [{
  1074. 'url': 'https://vimeo.com/user755559/likes/',
  1075. 'playlist_mincount': 293,
  1076. 'info_dict': {
  1077. 'id': 'user755559',
  1078. 'title': 'urza’s Likes',
  1079. },
  1080. }, {
  1081. 'url': 'https://vimeo.com/stormlapse/likes',
  1082. 'only_matching': True,
  1083. }]
  1084. def _page_url(self, base_url, pagenum):
  1085. return '%s/page:%d/' % (base_url, pagenum)
  1086. def _real_extract(self, url):
  1087. user_id = self._match_id(url)
  1088. return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id)
  1089. class VHXEmbedIE(VimeoBaseInfoExtractor):
  1090. IE_NAME = 'vhx:embed'
  1091. _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
  1092. @staticmethod
  1093. def _extract_url(webpage):
  1094. mobj = re.search(
  1095. r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
  1096. return unescapeHTML(mobj.group(1)) if mobj else None
  1097. def _real_extract(self, url):
  1098. video_id = self._match_id(url)
  1099. webpage = self._download_webpage(url, video_id)
  1100. config_url = self._parse_json(self._search_regex(
  1101. r'window\.OTTData\s*=\s*({.+})', webpage,
  1102. 'ott data'), video_id, js_to_json)['config_url']
  1103. config = self._download_json(config_url, video_id)
  1104. info = self._parse_config(config, video_id)
  1105. info['id'] = video_id
  1106. self._vimeo_sort_formats(info['formats'])
  1107. return info