vtm.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. from __future__ import unicode_literals
  2. import re
  3. from .generic import GenericIE
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. urlencode_postdata,
  7. compat_urllib_parse_urlencode,
  8. ExtractorError,
  9. remove_end,
  10. )
  11. class VTMIE(InfoExtractor):
  12. """Download full episodes that require an account from vtm.be or q2.be.
  13. The generic extractor can be used to download clips that do no require an
  14. account.
  15. """
  16. _VALID_URL = r'https?://(?:www\.)?(?P<site_id>vtm|q2)\.be/video[/?].+?'
  17. _NETRC_MACHINE = 'vtm'
  18. _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
  19. _TESTS = [
  20. {
  21. 'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
  22. 'info_dict': {
  23. 'id': 'vtm_20170219_VM0678361_vtmwatch',
  24. 'ext': 'mp4',
  25. 'title': 'Allemaal Chris afl. 6',
  26. 'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
  27. },
  28. 'skip_download': True,
  29. },
  30. {
  31. 'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
  32. 'only_matching': True,
  33. },
  34. {
  35. 'url': 'http://vtm.be/video?aid=163157',
  36. 'only_matching': True,
  37. },
  38. {
  39. 'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
  40. 'only_matching': True,
  41. },
  42. {
  43. 'url': 'http://vtm.be/video?aid=168332',
  44. 'info_dict': {
  45. 'id': 'video?aid=168332',
  46. 'ext': 'mp4',
  47. 'title': 'Videozone',
  48. },
  49. },
  50. ]
  51. def _real_initialize(self):
  52. self._logged_in = False
  53. def _login(self):
  54. (username, password) = self._get_login_info()
  55. if username is None or password is None:
  56. self.raise_login_required()
  57. auth_data = {
  58. 'APIKey': self._APIKEY,
  59. 'sdk': 'js_6.1',
  60. 'format': 'json',
  61. 'loginID': username,
  62. 'password': password,
  63. }
  64. auth_info = self._download_json(
  65. 'https://accounts.eu1.gigya.com/accounts.login', None,
  66. note='Logging in', errnote='Unable to log in',
  67. data=urlencode_postdata(auth_data), fatal=True)
  68. error_message = auth_info.get('errorDetails')
  69. if error_message:
  70. raise ExtractorError(
  71. 'Unable to login: %s' % error_message, expected=True)
  72. self._uid = auth_info['UID']
  73. self._uid_signature = auth_info['UIDSignature']
  74. self._signature_timestamp = auth_info['signatureTimestamp']
  75. self._logged_in = True
  76. def _real_extract(self, url):
  77. mobj = re.match(self._VALID_URL, url)
  78. site_id = mobj.group('site_id')
  79. webpage = self._download_webpage(url, None, "Downloading webpage")
  80. # The URL sometimes contains the video id, but not always, e.g., test
  81. # case 3. Fortunately, all webpages of videos requiring authentication
  82. # contain the video id.
  83. video_id = self._search_regex(
  84. r'\\"vodId\\":\\"(.+?)\\"', webpage, 'video_id', default=None)
  85. # It was most likely a video not requiring authentication.
  86. if not video_id:
  87. return self.url_result(url, 'Generic')
  88. if not self._logged_in:
  89. self._login()
  90. title = self._html_search_regex(
  91. r'\\"title\\":\\"(.+?)\\"', webpage, 'title', default=None)
  92. description = self._html_search_regex(
  93. r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
  94. webpage, 'description', default=None)
  95. data_url = 'http://vod.medialaan.io/api/1.0/item/%s/video' % video_id
  96. m3u8_data = {
  97. 'app_id': 'vtm_watch' if site_id == 'vtm' else 'q2',
  98. 'user_network': 'vtm-sso',
  99. 'UID': self._uid,
  100. 'UIDSignature': self._uid_signature,
  101. 'signatureTimestamp': self._signature_timestamp,
  102. }
  103. data = self._download_json(data_url, video_id, query=m3u8_data)
  104. formats = self._extract_m3u8_formats(
  105. data['response']['uri'], video_id, entry_protocol='m3u8_native',
  106. ext='mp4', m3u8_id='hls')
  107. self._sort_formats(formats)
  108. return {
  109. 'id': video_id,
  110. 'title': title,
  111. 'description': description,
  112. 'formats': formats,
  113. }