gaia.py 3.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. int_or_none,
  8. str_or_none,
  9. strip_or_none,
  10. try_get,
  11. )
  12. class GaiaIE(InfoExtractor):
  13. _VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)'
  14. _TESTS = [{
  15. 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature',
  16. 'info_dict': {
  17. 'id': '89356',
  18. 'ext': 'mp4',
  19. 'title': 'Connecting with Universal Consciousness',
  20. 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
  21. 'upload_date': '20151116',
  22. 'timestamp': 1447707266,
  23. 'duration': 936,
  24. },
  25. 'params': {
  26. # m3u8 download
  27. 'skip_download': True,
  28. },
  29. }, {
  30. 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview',
  31. 'info_dict': {
  32. 'id': '89351',
  33. 'ext': 'mp4',
  34. 'title': 'Connecting with Universal Consciousness',
  35. 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
  36. 'upload_date': '20151116',
  37. 'timestamp': 1447707266,
  38. 'duration': 53,
  39. },
  40. 'params': {
  41. # m3u8 download
  42. 'skip_download': True,
  43. },
  44. }]
  45. def _real_extract(self, url):
  46. display_id, vtype = re.search(self._VALID_URL, url).groups()
  47. node_id = self._download_json(
  48. 'https://brooklyn.gaia.com/pathinfo', display_id, query={
  49. 'path': 'video/' + display_id,
  50. })['id']
  51. node = self._download_json(
  52. 'https://brooklyn.gaia.com/node/%d' % node_id, node_id)
  53. vdata = node[vtype]
  54. media_id = compat_str(vdata['nid'])
  55. title = node['title']
  56. media = self._download_json(
  57. 'https://brooklyn.gaia.com/media/' + media_id, media_id)
  58. formats = self._extract_m3u8_formats(
  59. media['mediaUrls']['bcHLS'], media_id, 'mp4')
  60. self._sort_formats(formats)
  61. subtitles = {}
  62. text_tracks = media.get('textTracks', {})
  63. for key in ('captions', 'subtitles'):
  64. for lang, sub_url in text_tracks.get(key, {}).items():
  65. subtitles.setdefault(lang, []).append({
  66. 'url': sub_url,
  67. })
  68. fivestar = node.get('fivestar', {})
  69. fields = node.get('fields', {})
  70. def get_field_value(key, value_key='value'):
  71. return try_get(fields, lambda x: x[key][0][value_key])
  72. return {
  73. 'id': media_id,
  74. 'display_id': display_id,
  75. 'title': title,
  76. 'formats': formats,
  77. 'description': strip_or_none(get_field_value('body') or get_field_value('teaser')),
  78. 'timestamp': int_or_none(node.get('created')),
  79. 'subtitles': subtitles,
  80. 'duration': int_or_none(vdata.get('duration')),
  81. 'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])),
  82. 'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])),
  83. 'comment_count': int_or_none(node.get('comment_count')),
  84. 'series': try_get(node, lambda x: x['series']['title'], compat_str),
  85. 'season_number': int_or_none(get_field_value('season')),
  86. 'season_id': str_or_none(get_field_value('series_nid', 'nid')),
  87. 'episode_number': int_or_none(get_field_value('episode')),
  88. }