ustream.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..compat import (
  5. compat_urlparse,
  6. )
  7. from ..utils import (
  8. ExtractorError,
  9. int_or_none,
  10. float_or_none,
  11. )
  12. class UstreamIE(InfoExtractor):
  13. _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
  14. IE_NAME = 'ustream'
  15. _TESTS = [{
  16. 'url': 'http://www.ustream.tv/recorded/20274954',
  17. 'md5': '088f151799e8f572f84eb62f17d73e5c',
  18. 'info_dict': {
  19. 'id': '20274954',
  20. 'ext': 'flv',
  21. 'uploader': 'Young Americans for Liberty',
  22. 'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
  23. },
  24. }, {
  25. # From http://sportscanada.tv/canadagames/index.php/week2/figure-skating/444
  26. # Title and uploader available only from params JSON
  27. 'url': 'http://www.ustream.tv/embed/recorded/59307601?ub=ff0000&lc=ff0000&oc=ffffff&uc=ffffff&v=3&wmode=direct',
  28. 'md5': '5a2abf40babeac9812ed20ae12d34e10',
  29. 'info_dict': {
  30. 'id': '59307601',
  31. 'ext': 'flv',
  32. 'title': '-CG11- Canada Games Figure Skating',
  33. 'uploader': 'sportscanadatv',
  34. }
  35. }]
  36. def _real_extract(self, url):
  37. m = re.match(self._VALID_URL, url)
  38. video_id = m.group('id')
  39. # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990)
  40. if m.group('type') == 'embed/recorded':
  41. video_id = m.group('id')
  42. desktop_url = 'http://www.ustream.tv/recorded/' + video_id
  43. return self.url_result(desktop_url, 'Ustream')
  44. if m.group('type') == 'embed':
  45. video_id = m.group('id')
  46. webpage = self._download_webpage(url, video_id)
  47. desktop_video_id = self._html_search_regex(
  48. r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
  49. desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
  50. return self.url_result(desktop_url, 'Ustream')
  51. params = self._download_json(
  52. 'https://api.ustream.tv/videos/%s.json' % video_id, video_id)
  53. error = params.get('error')
  54. if error:
  55. raise ExtractorError(
  56. '%s returned error: %s' % (self.IE_NAME, error), expected=True)
  57. video = params['video']
  58. title = video['title']
  59. filesize = float_or_none(video.get('file_size'))
  60. formats = [{
  61. 'id': format_id,
  62. 'url': video_url,
  63. 'ext': format_id,
  64. 'filesize': filesize,
  65. } for format_id, video_url in video['media_urls'].items()]
  66. self._sort_formats(formats)
  67. description = video.get('description')
  68. timestamp = int_or_none(video.get('created_at'))
  69. duration = float_or_none(video.get('length'))
  70. view_count = int_or_none(video.get('views'))
  71. uploader = video.get('owner', {}).get('username')
  72. uploader_id = video.get('owner', {}).get('id')
  73. thumbnails = [{
  74. 'id': thumbnail_id,
  75. 'url': thumbnail_url,
  76. } for thumbnail_id, thumbnail_url in video.get('thumbnail', {}).items()]
  77. return {
  78. 'id': video_id,
  79. 'title': title,
  80. 'description': description,
  81. 'thumbnails': thumbnails,
  82. 'timestamp': timestamp,
  83. 'duration': duration,
  84. 'view_count': view_count,
  85. 'uploader': uploader,
  86. 'uploader_id': uploader_id,
  87. 'formats': formats,
  88. }
  89. class UstreamChannelIE(InfoExtractor):
  90. _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
  91. IE_NAME = 'ustream:channel'
  92. _TEST = {
  93. 'url': 'http://www.ustream.tv/channel/channeljapan',
  94. 'info_dict': {
  95. 'id': '10874166',
  96. },
  97. 'playlist_mincount': 17,
  98. }
  99. def _real_extract(self, url):
  100. m = re.match(self._VALID_URL, url)
  101. display_id = m.group('slug')
  102. webpage = self._download_webpage(url, display_id)
  103. channel_id = self._html_search_meta('ustream:channel_id', webpage)
  104. BASE = 'http://www.ustream.tv'
  105. next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
  106. video_ids = []
  107. while next_url:
  108. reply = self._download_json(
  109. compat_urlparse.urljoin(BASE, next_url), display_id,
  110. note='Downloading video information (next: %d)' % (len(video_ids) + 1))
  111. video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
  112. next_url = reply['nextUrl']
  113. entries = [
  114. self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
  115. for vid in video_ids]
  116. return {
  117. '_type': 'playlist',
  118. 'id': channel_id,
  119. 'display_id': display_id,
  120. 'entries': entries,
  121. }