tubitv.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import codecs
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. ExtractorError,
  8. int_or_none,
  9. sanitized_Request,
  10. urlencode_postdata,
  11. )
  12. class TubiTvIE(InfoExtractor):
  13. _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video\?id=(?P<id>[0-9]+)'
  14. _LOGIN_URL = 'http://tubitv.com/login'
  15. _NETRC_MACHINE = 'tubitv'
  16. _TEST = {
  17. 'url': 'http://tubitv.com/video?id=54411&title=The_Kitchen_Musical_-_EP01',
  18. 'info_dict': {
  19. 'id': '54411',
  20. 'ext': 'mp4',
  21. 'title': 'The Kitchen Musical - EP01',
  22. 'thumbnail': 're:^https?://.*\.png$',
  23. 'description': 'md5:37532716166069b353e8866e71fefae7',
  24. 'duration': 2407,
  25. },
  26. 'params': {
  27. 'skip_download': 'HLS download',
  28. },
  29. }
  30. def _login(self):
  31. (username, password) = self._get_login_info()
  32. if username is None:
  33. return
  34. self.report_login()
  35. form_data = {
  36. 'username': username,
  37. 'password': password,
  38. }
  39. payload = urlencode_postdata(form_data)
  40. request = sanitized_Request(self._LOGIN_URL, payload)
  41. request.add_header('Content-Type', 'application/x-www-form-urlencoded')
  42. login_page = self._download_webpage(
  43. request, None, False, 'Wrong login info')
  44. if not re.search(r'id="tubi-logout"', login_page):
  45. raise ExtractorError(
  46. 'Login failed (invalid username/password)', expected=True)
  47. def _real_initialize(self):
  48. self._login()
  49. def _real_extract(self, url):
  50. video_id = self._match_id(url)
  51. webpage = self._download_webpage(url, video_id)
  52. if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage):
  53. self.raise_login_required('This video requires login')
  54. title = self._og_search_title(webpage)
  55. description = self._og_search_description(webpage)
  56. thumbnail = self._og_search_thumbnail(webpage)
  57. duration = int_or_none(self._html_search_meta(
  58. 'video:duration', webpage, 'duration'))
  59. apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu')
  60. m3u8_url = codecs.decode(apu, 'rot_13')[::-1]
  61. formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
  62. self._sort_formats(formats)
  63. return {
  64. 'id': video_id,
  65. 'title': title,
  66. 'formats': formats,
  67. 'thumbnail': thumbnail,
  68. 'description': description,
  69. 'duration': duration,
  70. }