mediaset.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. determine_ext,
  8. parse_duration,
  9. try_get,
  10. unified_strdate,
  11. )
  12. class MediasetIE(InfoExtractor):
  13. _VALID_URL = r'''(?x)
  14. (?:
  15. mediaset:|
  16. https?://
  17. (?:www\.)?video\.mediaset\.it/
  18. (?:
  19. (?:video|on-demand)/(?:[^/]+/)+[^/]+_|
  20. player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
  21. )
  22. )(?P<id>[0-9]+)
  23. '''
  24. _TESTS = [{
  25. # full episode
  26. 'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
  27. 'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
  28. 'info_dict': {
  29. 'id': '661824',
  30. 'ext': 'mp4',
  31. 'title': 'Quarta puntata',
  32. 'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
  33. 'thumbnail': r're:^https?://.*\.jpg$',
  34. 'duration': 1414,
  35. 'creator': 'mediaset',
  36. 'release_date': '20161107',
  37. 'series': 'Hello Goodbye',
  38. 'categories': ['reality'],
  39. },
  40. 'expected_warnings': ['is not a supported codec'],
  41. }, {
  42. # clip
  43. 'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
  44. 'only_matching': True,
  45. }, {
  46. # iframe simple
  47. 'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
  48. 'only_matching': True,
  49. }, {
  50. # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
  51. 'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
  52. 'only_matching': True,
  53. }, {
  54. 'url': 'mediaset:661824',
  55. 'only_matching': True,
  56. }]
  57. def _real_extract(self, url):
  58. video_id = self._match_id(url)
  59. video_list = self._download_json(
  60. 'http://cdnsel01.mediaset.net/GetCdn.aspx',
  61. video_id, 'Downloading video CDN JSON', query={
  62. 'streamid': video_id,
  63. 'format': 'json',
  64. })['videoList']
  65. formats = []
  66. for format_url in video_list:
  67. if '.ism' in format_url:
  68. formats.extend(self._extract_ism_formats(
  69. format_url, video_id, ism_id='mss', fatal=False))
  70. else:
  71. formats.append({
  72. 'url': format_url,
  73. 'format_id': determine_ext(format_url),
  74. })
  75. self._sort_formats(formats)
  76. mediainfo = self._download_json(
  77. 'http://plr.video.mediaset.it/html/metainfo.sjson',
  78. video_id, 'Downloading video info JSON', query={
  79. 'id': video_id,
  80. })['video']
  81. title = mediainfo['title']
  82. creator = try_get(
  83. mediainfo, lambda x: x['brand-info']['publisher'], compat_str)
  84. category = try_get(
  85. mediainfo, lambda x: x['brand-info']['category'], compat_str)
  86. categories = [category] if category else None
  87. return {
  88. 'id': video_id,
  89. 'title': title,
  90. 'description': mediainfo.get('short-description'),
  91. 'thumbnail': mediainfo.get('thumbnail'),
  92. 'duration': parse_duration(mediainfo.get('duration')),
  93. 'creator': creator,
  94. 'release_date': unified_strdate(mediainfo.get('production-date')),
  95. 'webpage_url': mediainfo.get('url'),
  96. 'series': mediainfo.get('brand-value'),
  97. 'categories': categories,
  98. 'formats': formats,
  99. }