aenetworks.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .theplatform import ThePlatformIE
  5. from ..utils import (
  6. ExtractorError,
  7. GeoRestrictedError,
  8. int_or_none,
  9. update_url_query,
  10. urlencode_postdata,
  11. )
  12. class AENetworksBaseIE(ThePlatformIE):
  13. _BASE_URL_REGEX = r'''(?x)https?://
  14. (?:(?:www|play|watch)\.)?
  15. (?P<domain>
  16. (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
  17. fyi\.tv
  18. )/'''
  19. _THEPLATFORM_KEY = 'crazyjava'
  20. _THEPLATFORM_SECRET = 's3cr3t'
  21. _DOMAIN_MAP = {
  22. 'history.com': ('HISTORY', 'history'),
  23. 'aetv.com': ('AETV', 'aetv'),
  24. 'mylifetime.com': ('LIFETIME', 'lifetime'),
  25. 'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
  26. 'fyi.tv': ('FYI', 'fyi'),
  27. 'historyvault.com': (None, 'historyvault'),
  28. 'biography.com': (None, 'biography'),
  29. }
  30. def _extract_aen_smil(self, smil_url, video_id, auth=None):
  31. query = {'mbr': 'true'}
  32. if auth:
  33. query['auth'] = auth
  34. TP_SMIL_QUERY = [{
  35. 'assetTypes': 'high_video_ak',
  36. 'switch': 'hls_high_ak'
  37. }, {
  38. 'assetTypes': 'high_video_s3'
  39. }, {
  40. 'assetTypes': 'high_video_s3',
  41. 'switch': 'hls_high_fastly',
  42. }]
  43. formats = []
  44. subtitles = {}
  45. last_e = None
  46. for q in TP_SMIL_QUERY:
  47. q.update(query)
  48. m_url = update_url_query(smil_url, q)
  49. m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
  50. try:
  51. tp_formats, tp_subtitles = self._extract_theplatform_smil(
  52. m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
  53. except ExtractorError as e:
  54. if isinstance(e, GeoRestrictedError):
  55. raise
  56. last_e = e
  57. continue
  58. formats.extend(tp_formats)
  59. subtitles = self._merge_subtitles(subtitles, tp_subtitles)
  60. if last_e and not formats:
  61. raise last_e
  62. self._sort_formats(formats)
  63. return {
  64. 'id': video_id,
  65. 'formats': formats,
  66. 'subtitles': subtitles,
  67. }
  68. def _extract_aetn_info(self, domain, filter_key, filter_value, url):
  69. requestor_id, brand = self._DOMAIN_MAP[domain]
  70. result = self._download_json(
  71. 'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
  72. filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
  73. title = result['title']
  74. video_id = result['id']
  75. media_url = result['publicUrl']
  76. theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
  77. r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
  78. info = self._parse_theplatform_metadata(theplatform_metadata)
  79. auth = None
  80. if theplatform_metadata.get('AETN$isBehindWall'):
  81. resource = self._get_mvpd_resource(
  82. requestor_id, theplatform_metadata['title'],
  83. theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
  84. theplatform_metadata['ratings'][0]['rating'])
  85. auth = self._extract_mvpd_auth(
  86. url, video_id, requestor_id, resource)
  87. info.update(self._extract_aen_smil(media_url, video_id, auth))
  88. info.update({
  89. 'title': title,
  90. 'series': result.get('seriesName'),
  91. 'season_number': int_or_none(result.get('tvSeasonNumber')),
  92. 'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
  93. })
  94. return info
  95. class AENetworksIE(AENetworksBaseIE):
  96. IE_NAME = 'aenetworks'
  97. IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
  98. _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
  99. shows/[^/]+/season-\d+/episode-\d+|
  100. (?:
  101. (?:movie|special)s/[^/]+|
  102. (?:shows/[^/]+/)?videos
  103. )/[^/?#&]+
  104. )'''
  105. _TESTS = [{
  106. 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
  107. 'info_dict': {
  108. 'id': '22253814',
  109. 'ext': 'mp4',
  110. 'title': 'Winter is Coming',
  111. 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
  112. 'timestamp': 1338306241,
  113. 'upload_date': '20120529',
  114. 'uploader': 'AENE-NEW',
  115. },
  116. 'params': {
  117. # m3u8 download
  118. 'skip_download': True,
  119. },
  120. 'add_ie': ['ThePlatform'],
  121. 'skip': 'This video is only available for users of participating TV providers.',
  122. }, {
  123. 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
  124. 'info_dict': {
  125. 'id': '600587331957',
  126. 'ext': 'mp4',
  127. 'title': 'Inlawful Entry',
  128. 'description': 'md5:57c12115a2b384d883fe64ca50529e08',
  129. 'timestamp': 1452634428,
  130. 'upload_date': '20160112',
  131. 'uploader': 'AENE-NEW',
  132. },
  133. 'params': {
  134. # m3u8 download
  135. 'skip_download': True,
  136. },
  137. 'add_ie': ['ThePlatform'],
  138. }, {
  139. 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
  140. 'only_matching': True
  141. }, {
  142. 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
  143. 'only_matching': True
  144. }, {
  145. 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
  146. 'only_matching': True
  147. }, {
  148. 'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
  149. 'only_matching': True
  150. }, {
  151. 'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
  152. 'only_matching': True
  153. }, {
  154. 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
  155. 'only_matching': True
  156. }, {
  157. 'url': 'http://www.history.com/videos/history-of-valentines-day',
  158. 'only_matching': True
  159. }, {
  160. 'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
  161. 'only_matching': True
  162. }]
  163. def _real_extract(self, url):
  164. domain, canonical = re.match(self._VALID_URL, url).groups()
  165. return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
  166. class AENetworksListBaseIE(AENetworksBaseIE):
  167. def _call_api(self, resource, slug, brand, fields):
  168. return self._download_json(
  169. 'https://yoga.appsvcs.aetnd.com/graphql',
  170. slug, query={'brand': brand}, data=urlencode_postdata({
  171. 'query': '''{
  172. %s(slug: "%s") {
  173. %s
  174. }
  175. }''' % (resource, slug, fields),
  176. }))['data'][resource]
  177. def _real_extract(self, url):
  178. domain, slug = re.match(self._VALID_URL, url).groups()
  179. _, brand = self._DOMAIN_MAP[domain]
  180. playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
  181. base_url = 'http://watch.%s' % domain
  182. entries = []
  183. for item in (playlist.get(self._ITEMS_KEY) or []):
  184. doc = self._get_doc(item)
  185. canonical = doc.get('canonical')
  186. if not canonical:
  187. continue
  188. entries.append(self.url_result(
  189. base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
  190. description = None
  191. if self._PLAYLIST_DESCRIPTION_KEY:
  192. description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
  193. return self.playlist_result(
  194. entries, playlist.get('id'),
  195. playlist.get(self._PLAYLIST_TITLE_KEY), description)
  196. class AENetworksCollectionIE(AENetworksListBaseIE):
  197. IE_NAME = 'aenetworks:collection'
  198. _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
  199. _TESTS = [{
  200. 'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
  201. 'info_dict': {
  202. 'id': '282',
  203. 'title': 'America The Story of Us',
  204. },
  205. 'playlist_mincount': 12,
  206. }, {
  207. 'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
  208. 'only_matching': True
  209. }, {
  210. 'url': 'https://www.historyvault.com/collections/mysteryquest',
  211. 'only_matching': True
  212. }]
  213. _RESOURCE = 'list'
  214. _ITEMS_KEY = 'items'
  215. _PLAYLIST_TITLE_KEY = 'display_title'
  216. _PLAYLIST_DESCRIPTION_KEY = None
  217. _FIELDS = '''id
  218. display_title
  219. items {
  220. ... on ListVideoItem {
  221. doc {
  222. canonical
  223. id
  224. }
  225. }
  226. }'''
  227. def _get_doc(self, item):
  228. return item.get('doc') or {}
  229. class AENetworksShowIE(AENetworksListBaseIE):
  230. IE_NAME = 'aenetworks:show'
  231. _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
  232. _TESTS = [{
  233. 'url': 'http://www.history.com/shows/ancient-aliens',
  234. 'info_dict': {
  235. 'id': 'SERIES1574',
  236. 'title': 'Ancient Aliens',
  237. 'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
  238. },
  239. 'playlist_mincount': 150,
  240. }]
  241. _RESOURCE = 'series'
  242. _ITEMS_KEY = 'episodes'
  243. _PLAYLIST_TITLE_KEY = 'title'
  244. _PLAYLIST_DESCRIPTION_KEY = 'description'
  245. _FIELDS = '''description
  246. id
  247. title
  248. episodes {
  249. canonical
  250. id
  251. }'''
  252. def _get_doc(self, item):
  253. return item
  254. class HistoryTopicIE(AENetworksBaseIE):
  255. IE_NAME = 'history:topic'
  256. IE_DESC = 'History.com Topic'
  257. _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
  258. _TESTS = [{
  259. 'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
  260. 'info_dict': {
  261. 'id': '40700995724',
  262. 'ext': 'mp4',
  263. 'title': "History of Valentine’s Day",
  264. 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
  265. 'timestamp': 1375819729,
  266. 'upload_date': '20130806',
  267. 'uploader': 'AENE-NEW',
  268. },
  269. 'params': {
  270. # m3u8 download
  271. 'skip_download': True,
  272. },
  273. 'add_ie': ['ThePlatform'],
  274. }]
  275. def _real_extract(self, url):
  276. display_id = self._match_id(url)
  277. return self.url_result(
  278. 'http://www.history.com/videos/' + display_id,
  279. AENetworksIE.ie_key())
  280. class HistoryPlayerIE(AENetworksBaseIE):
  281. IE_NAME = 'history:player'
  282. _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
  283. _TESTS = []
  284. def _real_extract(self, url):
  285. domain, video_id = re.match(self._VALID_URL, url).groups()
  286. return self._extract_aetn_info(domain, 'id', video_id, url)
  287. class BiographyIE(AENetworksBaseIE):
  288. _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
  289. _TESTS = [{
  290. 'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
  291. 'info_dict': {
  292. 'id': '30322987',
  293. 'ext': 'mp4',
  294. 'title': 'Vincent Van Gogh - Full Episode',
  295. 'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
  296. 'timestamp': 1311970571,
  297. 'upload_date': '20110729',
  298. 'uploader': 'AENE-NEW',
  299. },
  300. 'params': {
  301. # m3u8 download
  302. 'skip_download': True,
  303. },
  304. 'add_ie': ['ThePlatform'],
  305. }]
  306. def _real_extract(self, url):
  307. display_id = self._match_id(url)
  308. webpage = self._download_webpage(url, display_id)
  309. player_url = self._search_regex(
  310. r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
  311. webpage, 'player URL')
  312. return self.url_result(player_url, HistoryPlayerIE.ie_key())