ustream.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. from __future__ import unicode_literals
  2. import json
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_urlparse,
  7. )
  8. from ..utils import (
  9. ExtractorError,
  10. int_or_none,
  11. float_or_none,
  12. )
  13. class UstreamIE(InfoExtractor):
  14. _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
  15. IE_NAME = 'ustream'
  16. _TESTS = [{
  17. 'url': 'http://www.ustream.tv/recorded/20274954',
  18. 'md5': '088f151799e8f572f84eb62f17d73e5c',
  19. 'info_dict': {
  20. 'id': '20274954',
  21. 'ext': 'flv',
  22. 'uploader': 'Young Americans for Liberty',
  23. 'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
  24. },
  25. }, {
  26. # From http://sportscanada.tv/canadagames/index.php/week2/figure-skating/444
  27. # Title and uploader available only from params JSON
  28. 'url': 'http://www.ustream.tv/embed/recorded/59307601?ub=ff0000&lc=ff0000&oc=ffffff&uc=ffffff&v=3&wmode=direct',
  29. 'md5': '5a2abf40babeac9812ed20ae12d34e10',
  30. 'info_dict': {
  31. 'id': '59307601',
  32. 'ext': 'flv',
  33. 'title': '-CG11- Canada Games Figure Skating',
  34. 'uploader': 'sportscanadatv',
  35. }
  36. }]
  37. def _real_extract(self, url):
  38. m = re.match(self._VALID_URL, url)
  39. video_id = m.group('id')
  40. # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990)
  41. if m.group('type') == 'embed/recorded':
  42. video_id = m.group('id')
  43. desktop_url = 'http://www.ustream.tv/recorded/' + video_id
  44. return self.url_result(desktop_url, 'Ustream')
  45. if m.group('type') == 'embed':
  46. video_id = m.group('id')
  47. webpage = self._download_webpage(url, video_id)
  48. desktop_video_id = self._html_search_regex(
  49. r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
  50. desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
  51. return self.url_result(desktop_url, 'Ustream')
  52. params = self._download_json(
  53. 'https://api.ustream.tv/videos/%s.json' % video_id, video_id)
  54. error = params.get('error')
  55. if error:
  56. raise ExtractorError(
  57. '%s returned error: %s' % (self.IE_NAME, error), expected=True)
  58. video = params['video']
  59. formats = [{
  60. 'id': format_id,
  61. 'url': video_url,
  62. 'ext': format_id,
  63. } for format_id, video_url in video['media_urls'].items()]
  64. self._sort_formats(formats)
  65. title = video['title']
  66. description = video.get('description')
  67. timestamp = int_or_none(video.get('created_at'))
  68. duration = float_or_none(video.get('length'))
  69. filesize = float_or_none(video.get('file_size'))
  70. view_count = int_or_none(video.get('views'))
  71. uploader = video.get('owner', {}).get('username')
  72. uploader_id = video.get('owner', {}).get('id')
  73. thumbnails = [{
  74. 'id': thumbnail_id,
  75. 'url': thumbnail_url,
  76. } for thumbnail_id, thumbnail_url in video.get('thumbnail', {}).items()]
  77. return {
  78. 'id': video_id,
  79. 'title': title,
  80. 'description': description,
  81. 'thumbnails': thumbnails,
  82. 'timestamp': timestamp,
  83. 'duration': duration,
  84. 'filesize': filesize,
  85. 'view_count': view_count,
  86. 'uploader': uploader,
  87. 'uploader_id': uploader_id,
  88. 'formats': formats,
  89. }
  90. class UstreamChannelIE(InfoExtractor):
  91. _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
  92. IE_NAME = 'ustream:channel'
  93. _TEST = {
  94. 'url': 'http://www.ustream.tv/channel/channeljapan',
  95. 'info_dict': {
  96. 'id': '10874166',
  97. },
  98. 'playlist_mincount': 17,
  99. }
  100. def _real_extract(self, url):
  101. m = re.match(self._VALID_URL, url)
  102. display_id = m.group('slug')
  103. webpage = self._download_webpage(url, display_id)
  104. channel_id = self._html_search_meta('ustream:channel_id', webpage)
  105. BASE = 'http://www.ustream.tv'
  106. next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
  107. video_ids = []
  108. while next_url:
  109. reply = self._download_json(
  110. compat_urlparse.urljoin(BASE, next_url), display_id,
  111. note='Downloading video information (next: %d)' % (len(video_ids) + 1))
  112. video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
  113. next_url = reply['nextUrl']
  114. entries = [
  115. self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
  116. for vid in video_ids]
  117. return {
  118. '_type': 'playlist',
  119. 'id': channel_id,
  120. 'display_id': display_id,
  121. 'entries': entries,
  122. }