crackle.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. # coding: utf-8
  2. from __future__ import unicode_literals, division
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. determine_ext,
  8. float_or_none,
  9. int_or_none,
  10. parse_age_limit,
  11. parse_duration,
  12. )
  13. class CrackleIE(InfoExtractor):
  14. _GEO_COUNTRIES = ['US']
  15. _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
  16. _TEST = {
  17. 'url': 'https://www.crackle.com/andromeda/2502343',
  18. 'info_dict': {
  19. 'id': '2502343',
  20. 'ext': 'mp4',
  21. 'title': 'Under The Night',
  22. 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
  23. 'duration': 2583,
  24. 'view_count': int,
  25. 'average_rating': 0,
  26. 'age_limit': 14,
  27. 'genre': 'Action, Sci-Fi',
  28. 'creator': 'Allan Kroeker',
  29. 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
  30. 'release_year': 2000,
  31. 'series': 'Andromeda',
  32. 'episode': 'Under The Night',
  33. 'season_number': 1,
  34. 'episode_number': 1,
  35. },
  36. 'params': {
  37. # m3u8 download
  38. 'skip_download': True,
  39. }
  40. }
  41. def _real_extract(self, url):
  42. video_id = self._match_id(url)
  43. media = self._download_json(
  44. 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
  45. % (video_id, self._GEO_COUNTRIES[0]), video_id, query={
  46. 'disableProtocols': 'true',
  47. 'format': 'json'
  48. })
  49. title = media['Title']
  50. formats = []
  51. for e in media['MediaURLs']:
  52. if e.get('UseDRM') is True:
  53. continue
  54. format_url = e.get('Path')
  55. if not format_url or not isinstance(format_url, compat_str):
  56. continue
  57. ext = determine_ext(format_url)
  58. if ext == 'm3u8':
  59. formats.extend(self._extract_m3u8_formats(
  60. format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  61. m3u8_id='hls', fatal=False))
  62. elif ext == 'mpd':
  63. formats.extend(self._extract_mpd_formats(
  64. format_url, video_id, mpd_id='dash', fatal=False))
  65. self._sort_formats(formats)
  66. description = media.get('Description')
  67. duration = int_or_none(media.get(
  68. 'DurationInSeconds')) or parse_duration(media.get('Duration'))
  69. view_count = int_or_none(media.get('CountViews'))
  70. average_rating = float_or_none(media.get('UserRating'))
  71. age_limit = parse_age_limit(media.get('Rating'))
  72. genre = media.get('Genre')
  73. release_year = int_or_none(media.get('ReleaseYear'))
  74. creator = media.get('Directors')
  75. artist = media.get('Cast')
  76. if media.get('MediaTypeDisplayValue') == 'Full Episode':
  77. series = media.get('ShowName')
  78. episode = title
  79. season_number = int_or_none(media.get('Season'))
  80. episode_number = int_or_none(media.get('Episode'))
  81. else:
  82. series = episode = season_number = episode_number = None
  83. subtitles = {}
  84. cc_files = media.get('ClosedCaptionFiles')
  85. if isinstance(cc_files, list):
  86. for cc_file in cc_files:
  87. if not isinstance(cc_file, dict):
  88. continue
  89. cc_url = cc_file.get('Path')
  90. if not cc_url or not isinstance(cc_url, compat_str):
  91. continue
  92. lang = cc_file.get('Locale') or 'en'
  93. subtitles.setdefault(lang, []).append({'url': cc_url})
  94. thumbnails = []
  95. images = media.get('Images')
  96. if isinstance(images, list):
  97. for image_key, image_url in images.items():
  98. mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
  99. if not mobj:
  100. continue
  101. thumbnails.append({
  102. 'url': image_url,
  103. 'width': int(mobj.group(1)),
  104. 'height': int(mobj.group(2)),
  105. })
  106. return {
  107. 'id': video_id,
  108. 'title': title,
  109. 'description': description,
  110. 'duration': duration,
  111. 'view_count': view_count,
  112. 'average_rating': average_rating,
  113. 'age_limit': age_limit,
  114. 'genre': genre,
  115. 'creator': creator,
  116. 'artist': artist,
  117. 'release_year': release_year,
  118. 'series': series,
  119. 'episode': episode,
  120. 'season_number': season_number,
  121. 'episode_number': episode_number,
  122. 'thumbnails': thumbnails,
  123. 'subtitles': subtitles,
  124. 'formats': formats,
  125. }