crackle.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. # coding: utf-8
  2. from __future__ import unicode_literals, division
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_HTTPError
  6. from ..utils import (
  7. determine_ext,
  8. float_or_none,
  9. int_or_none,
  10. parse_age_limit,
  11. parse_duration,
  12. url_or_none,
  13. ExtractorError
  14. )
  15. class CrackleIE(InfoExtractor):
  16. _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
  17. _TESTS = [{
  18. # geo restricted to CA
  19. 'url': 'https://www.crackle.com/andromeda/2502343',
  20. 'info_dict': {
  21. 'id': '2502343',
  22. 'ext': 'mp4',
  23. 'title': 'Under The Night',
  24. 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
  25. 'duration': 2583,
  26. 'view_count': int,
  27. 'average_rating': 0,
  28. 'age_limit': 14,
  29. 'genre': 'Action, Sci-Fi',
  30. 'creator': 'Allan Kroeker',
  31. 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
  32. 'release_year': 2000,
  33. 'series': 'Andromeda',
  34. 'episode': 'Under The Night',
  35. 'season_number': 1,
  36. 'episode_number': 1,
  37. },
  38. 'params': {
  39. # m3u8 download
  40. 'skip_download': True,
  41. }
  42. }, {
  43. 'url': 'https://www.sonycrackle.com/andromeda/2502343',
  44. 'only_matching': True,
  45. }]
  46. _MEDIA_FILE_SLOTS = {
  47. '360p.mp4': {
  48. 'width': 640,
  49. 'height': 360,
  50. },
  51. '480p.mp4': {
  52. 'width': 768,
  53. 'height': 432,
  54. },
  55. '480p_1mbps.mp4': {
  56. 'width': 852,
  57. 'height': 480,
  58. },
  59. }
  60. def _real_extract(self, url):
  61. video_id = self._match_id(url)
  62. country_code = self._downloader.params.get('geo_bypass_country', None)
  63. countries = [country_code] if country_code else (
  64. 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
  65. last_e = None
  66. for country in countries:
  67. try:
  68. media = self._download_json(
  69. 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
  70. % (video_id, country), video_id,
  71. 'Downloading media JSON as %s' % country,
  72. 'Unable to download media JSON', query={
  73. 'disableProtocols': 'true',
  74. 'format': 'json'
  75. })
  76. except ExtractorError as e:
  77. # 401 means geo restriction, trying next country
  78. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
  79. last_e = e
  80. continue
  81. raise
  82. media_urls = media.get('MediaURLs')
  83. if not media_urls or not isinstance(media_urls, list):
  84. continue
  85. title = media['Title']
  86. formats = []
  87. for e in media['MediaURLs']:
  88. if e.get('UseDRM') is True:
  89. continue
  90. format_url = url_or_none(e.get('Path'))
  91. if not format_url:
  92. continue
  93. ext = determine_ext(format_url)
  94. if ext == 'm3u8':
  95. formats.extend(self._extract_m3u8_formats(
  96. format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  97. m3u8_id='hls', fatal=False))
  98. elif ext == 'mpd':
  99. formats.extend(self._extract_mpd_formats(
  100. format_url, video_id, mpd_id='dash', fatal=False))
  101. elif format_url.endswith('.ism/Manifest'):
  102. formats.extend(self._extract_ism_formats(
  103. format_url, video_id, ism_id='mss', fatal=False))
  104. else:
  105. mfs_path = e.get('Type')
  106. mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
  107. if not mfs_info:
  108. continue
  109. formats.append({
  110. 'url': format_url,
  111. 'format_id': 'http-' + mfs_path.split('.')[0],
  112. 'width': mfs_info['width'],
  113. 'height': mfs_info['height'],
  114. })
  115. self._sort_formats(formats)
  116. description = media.get('Description')
  117. duration = int_or_none(media.get(
  118. 'DurationInSeconds')) or parse_duration(media.get('Duration'))
  119. view_count = int_or_none(media.get('CountViews'))
  120. average_rating = float_or_none(media.get('UserRating'))
  121. age_limit = parse_age_limit(media.get('Rating'))
  122. genre = media.get('Genre')
  123. release_year = int_or_none(media.get('ReleaseYear'))
  124. creator = media.get('Directors')
  125. artist = media.get('Cast')
  126. if media.get('MediaTypeDisplayValue') == 'Full Episode':
  127. series = media.get('ShowName')
  128. episode = title
  129. season_number = int_or_none(media.get('Season'))
  130. episode_number = int_or_none(media.get('Episode'))
  131. else:
  132. series = episode = season_number = episode_number = None
  133. subtitles = {}
  134. cc_files = media.get('ClosedCaptionFiles')
  135. if isinstance(cc_files, list):
  136. for cc_file in cc_files:
  137. if not isinstance(cc_file, dict):
  138. continue
  139. cc_url = url_or_none(cc_file.get('Path'))
  140. if not cc_url:
  141. continue
  142. lang = cc_file.get('Locale') or 'en'
  143. subtitles.setdefault(lang, []).append({'url': cc_url})
  144. thumbnails = []
  145. images = media.get('Images')
  146. if isinstance(images, list):
  147. for image_key, image_url in images.items():
  148. mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
  149. if not mobj:
  150. continue
  151. thumbnails.append({
  152. 'url': image_url,
  153. 'width': int(mobj.group(1)),
  154. 'height': int(mobj.group(2)),
  155. })
  156. return {
  157. 'id': video_id,
  158. 'title': title,
  159. 'description': description,
  160. 'duration': duration,
  161. 'view_count': view_count,
  162. 'average_rating': average_rating,
  163. 'age_limit': age_limit,
  164. 'genre': genre,
  165. 'creator': creator,
  166. 'artist': artist,
  167. 'release_year': release_year,
  168. 'series': series,
  169. 'episode': episode,
  170. 'season_number': season_number,
  171. 'episode_number': episode_number,
  172. 'thumbnails': thumbnails,
  173. 'subtitles': subtitles,
  174. 'formats': formats,
  175. }
  176. raise last_e