cda.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. decode_packed_codes,
  7. ExtractorError,
  8. parse_duration
  9. )
  10. class CDAIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:(?:www|ebd)\.)?cda\.pl/(?:video|[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
  12. _TESTS = [
  13. {
  14. 'url': 'http://www.cda.pl/video/5749950c',
  15. 'md5': '6f844bf51b15f31fae165365707ae970',
  16. 'info_dict': {
  17. 'id': '5749950c',
  18. 'ext': 'mp4',
  19. 'height': 720,
  20. 'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
  21. 'duration': 39
  22. }
  23. },
  24. {
  25. 'url': 'http://www.cda.pl/video/57413289',
  26. 'md5': 'a88828770a8310fc00be6c95faf7f4d5',
  27. 'info_dict': {
  28. 'id': '57413289',
  29. 'ext': 'mp4',
  30. 'title': 'Lądowanie na lotnisku na Maderze',
  31. 'duration': 137
  32. }
  33. }
  34. ]
  35. def _real_extract(self, url):
  36. video_id = self._match_id(url)
  37. webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
  38. if 'Ten film jest dostępny dla użytkowników premium' in webpage:
  39. raise ExtractorError('This video is only available for premium users.', expected=True)
  40. title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title', fatal=False)
  41. def _get_format(page, version=''):
  42. unpacked = decode_packed_codes(page)
  43. duration = self._search_regex(r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False)
  44. format_id = None
  45. height = None
  46. m = re.search(r'<a data-quality="(?P<format_id>[^"]+)" href="[^"]+" class="quality-btn quality-btn-active">(?P<height>[0-9]+)p<\/a>', page)
  47. if m:
  48. format_id = m.group('format_id')
  49. height = int(m.group('height'))
  50. url = self._search_regex(r"url:\\'(.+?)\\'", unpacked, version + ' url', fatal=False)
  51. if url is None:
  52. return None
  53. return {
  54. 'format_id': format_id,
  55. 'height': height,
  56. 'url': url
  57. }, parse_duration(duration)
  58. formats = []
  59. format_desc, duration = _get_format(webpage) or (None, None)
  60. if format_desc is not None:
  61. formats.append(format_desc)
  62. pattern = re.compile(r'<a data-quality="[^"]+" href="([^"]+)" class="quality-btn">([0-9]+p)<\/a>')
  63. for version in re.findall(pattern, webpage):
  64. webpage = self._download_webpage(version[0], video_id, 'Downloading %s version information' % version[1], fatal=False)
  65. if not webpage:
  66. # Manually report warning because empty page is returned when invalid version is requested.
  67. self.report_warning('Unable to download %s version information' % version[1])
  68. continue
  69. format_desc, duration_ = _get_format(webpage, version[1]) or (None, None)
  70. duration = duration or duration_
  71. if format_desc is not None:
  72. formats.append(format_desc)
  73. self._sort_formats(formats)
  74. return {
  75. 'id': video_id,
  76. 'title': title,
  77. 'formats': formats,
  78. 'duration': duration
  79. }