ustream.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. from __future__ import unicode_literals
  2. import json
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_urlparse,
  7. )
  8. class UstreamIE(InfoExtractor):
  9. _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<videoID>\d+)'
  10. IE_NAME = 'ustream'
  11. _TEST = {
  12. 'url': 'http://www.ustream.tv/recorded/20274954',
  13. 'md5': '088f151799e8f572f84eb62f17d73e5c',
  14. 'info_dict': {
  15. 'id': '20274954',
  16. 'ext': 'flv',
  17. 'uploader': 'Young Americans for Liberty',
  18. 'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
  19. },
  20. }
  21. def _real_extract(self, url):
  22. m = re.match(self._VALID_URL, url)
  23. video_id = m.group('videoID')
  24. # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990)
  25. if m.group('type') == 'embed/recorded':
  26. video_id = m.group('videoID')
  27. desktop_url = 'http://www.ustream.tv/recorded/' + video_id
  28. return self.url_result(desktop_url, 'Ustream')
  29. if m.group('type') == 'embed':
  30. video_id = m.group('videoID')
  31. webpage = self._download_webpage(url, video_id)
  32. desktop_video_id = self._html_search_regex(
  33. r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
  34. desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
  35. return self.url_result(desktop_url, 'Ustream')
  36. params = self._download_json(
  37. 'http://cdngw.ustream.tv/rgwjson/Viewer.getVideo/' + json.dumps({
  38. 'brandId': 1,
  39. 'videoId': int(video_id),
  40. 'autoplay': False,
  41. }), video_id)
  42. video_url = params['flv']
  43. webpage = self._download_webpage(url, video_id)
  44. self.report_extraction(video_id)
  45. video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
  46. webpage, 'title')
  47. uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
  48. webpage, 'uploader', fatal=False, flags=re.DOTALL)
  49. thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
  50. webpage, 'thumbnail', fatal=False)
  51. return {
  52. 'id': video_id,
  53. 'url': video_url,
  54. 'ext': 'flv',
  55. 'title': video_title,
  56. 'uploader': uploader,
  57. 'thumbnail': thumbnail,
  58. }
  59. class UstreamChannelIE(InfoExtractor):
  60. _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
  61. IE_NAME = 'ustream:channel'
  62. _TEST = {
  63. 'url': 'http://www.ustream.tv/channel/channeljapan',
  64. 'info_dict': {
  65. 'id': '10874166',
  66. },
  67. 'playlist_mincount': 17,
  68. }
  69. def _real_extract(self, url):
  70. m = re.match(self._VALID_URL, url)
  71. display_id = m.group('slug')
  72. webpage = self._download_webpage(url, display_id)
  73. channel_id = self._html_search_meta('ustream:channel_id', webpage)
  74. BASE = 'http://www.ustream.tv'
  75. next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
  76. video_ids = []
  77. while next_url:
  78. reply = self._download_json(
  79. compat_urlparse.urljoin(BASE, next_url), display_id,
  80. note='Downloading video information (next: %d)' % (len(video_ids) + 1))
  81. video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
  82. next_url = reply['nextUrl']
  83. entries = [
  84. self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
  85. for vid in video_ids]
  86. return {
  87. '_type': 'playlist',
  88. 'id': channel_id,
  89. 'display_id': display_id,
  90. 'entries': entries,
  91. }