videoesri.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import os
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. unified_strdate
  8. )
  9. class VideoEsriIE(InfoExtractor):
  10. _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)'
  11. _TEST = {
  12. 'url': 'https://video.esri.com/watch/4228',
  13. 'md5': '170b4d513c2466ed483c150a48384133',
  14. 'info_dict': {
  15. 'id': '4228',
  16. 'ext': 'mp4',
  17. 'title': 'AppStudio for ArcGIS',
  18. 'thumbnail': 're:^https?://.*\.jpg$',
  19. 'upload_date': '20150310',
  20. }
  21. }
  22. def _real_extract(self, url):
  23. video_id = self._match_id(url)
  24. webpage = self._download_webpage(url, video_id)
  25. title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
  26. upload_date_raw = self._search_regex(
  27. r'http-equiv="last-modified" content="(.*)"',
  28. webpage, 'upload date')
  29. upload_date = unified_strdate(upload_date_raw)
  30. settings_info = self._search_regex(
  31. r'evPlayerSettings = {(.*?);\s*$',
  32. webpage, 'settings info', flags=re.MULTILINE | re.DOTALL)
  33. # thumbnail includes '_x' for large, also has {_m,_t,_s} or
  34. # without size suffix returns full image
  35. thumbnail_path = re.findall(
  36. r'image\': \'(\/thumbs.*)\'',
  37. settings_info)[0]
  38. if thumbnail_path:
  39. thumbnail = '/'.join(['http://video.esri.com', thumbnail_path])
  40. # note that this misses the (exceedly rare) webm files
  41. video_paths = re.findall(r'mp4:(.*)\'', settings_info)
  42. # find possible http servers of the mp4 files (also has rtsp)
  43. base_url = re.findall(
  44. r'netstreambasepath\':\s\'(h.*)\'', settings_info)[0]
  45. # these are the numbers used internally, but really map
  46. # to other resolutions, e.g. 960 is 720p.
  47. heights = [480, 720, 960]
  48. videos_by_res = {}
  49. for video_path in video_paths:
  50. url = "{base_url}{video_path}".format(
  51. base_url=base_url,
  52. video_path=video_path)
  53. filename, ext = os.path.splitext(video_path)
  54. height_label = int(filename.split('_')[1])
  55. videos_by_res[height_label] = {
  56. 'url': url,
  57. 'ext': ext[1:],
  58. 'protocol': 'http', # http-only supported currently
  59. }
  60. formats = []
  61. for height in heights:
  62. if height in videos_by_res:
  63. formats.append(videos_by_res[height])
  64. result = {
  65. 'id': video_id,
  66. 'title': title,
  67. 'upload_date': upload_date,
  68. 'formats': formats,
  69. }
  70. if thumbnail:
  71. result['thumbnail'] = thumbnail
  72. return result