streamcz.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # -*- coding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. import re
  4. import json
  5. from .common import InfoExtractor
  6. from ..utils import int_or_none
  7. class StreamCZIE(InfoExtractor):
  8. _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<videoid>.+)'
  9. _TESTS = [{
  10. 'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
  11. 'md5': '6d3ca61a8d0633c9c542b92fcb936b0c',
  12. 'info_dict': {
  13. 'id': '765767',
  14. 'ext': 'mp4',
  15. 'title': 'Peklo na talíři: Éčka pro děti',
  16. 'description': 'md5:49ace0df986e95e331d0fe239d421519',
  17. 'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
  18. 'duration': 256,
  19. },
  20. }, {
  21. 'url': 'https://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
  22. 'md5': '246272e753e26bbace7fcd9deca0650c',
  23. 'info_dict': {
  24. 'id': '10002447',
  25. 'ext': 'mp4',
  26. 'title': 'Kancelář Blaník: Tři roky pro Mazánka',
  27. 'description': 'md5:9177695a8b756a0a8ab160de4043b392',
  28. 'thumbnail': 'http://im.stream.cz/episode/537f838c50c11f8d21320000',
  29. 'duration': 368,
  30. },
  31. }]
  32. def _real_extract(self, url):
  33. mobj = re.match(self._VALID_URL, url)
  34. video_id = mobj.group('videoid')
  35. webpage = self._download_webpage(url, video_id)
  36. data = self._html_search_regex(r'Stream\.Data\.Episode\((.+?)\);', webpage, 'stream data')
  37. jsonData = json.loads(data)
  38. formats = []
  39. for video in jsonData['instances']:
  40. for video_format in video['instances']:
  41. format_id = video_format['quality']
  42. if format_id == '240p':
  43. quality = 0
  44. elif format_id == '360p':
  45. quality = 1
  46. elif format_id == '480p':
  47. quality = 2
  48. elif format_id == '720p':
  49. quality = 3
  50. formats.append({
  51. 'format_id': '%s-%s' % (video_format['type'].split('/')[1], format_id),
  52. 'url': video_format['source'],
  53. 'quality': quality,
  54. })
  55. self._sort_formats(formats)
  56. return {
  57. 'id': str(jsonData['episode_id']),
  58. 'title': self._og_search_title(webpage),
  59. 'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'),
  60. 'formats': formats,
  61. 'description': self._og_search_description(webpage),
  62. 'duration': int_or_none(jsonData['duration']),
  63. 'view_count': int_or_none(jsonData['stats_total']),
  64. }