mediaset.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..compat import compat_str
  5. from ..utils import (
  6. determine_ext,
  7. parse_duration,
  8. try_get,
  9. unified_strdate,
  10. )
  11. class MediasetIE(InfoExtractor):
  12. _VALID_URL = r'''(?x)
  13. https?://
  14. (?:www\.)?video\.mediaset\.it/
  15. (?:
  16. (?:video|on-demand)/(?:[^/]+/)+[^/]+_|
  17. player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
  18. )(?P<id>[0-9]+)
  19. '''
  20. _TESTS = [{
  21. # full episode
  22. 'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
  23. 'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
  24. 'info_dict': {
  25. 'id': '661824',
  26. 'ext': 'mp4',
  27. 'title': 'Quarta puntata',
  28. 'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
  29. 'thumbnail': r're:^https?://.*\.jpg$',
  30. 'duration': 1414,
  31. 'creator': 'mediaset',
  32. 'release_date': '20161107',
  33. 'series': 'Hello Goodbye',
  34. 'categories': ['reality'],
  35. },
  36. 'expected_warnings': ['is not a supported codec'],
  37. }, {
  38. # clip
  39. 'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
  40. 'only_matching': True,
  41. }, {
  42. # iframe simple
  43. 'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
  44. 'only_matching': True,
  45. }, {
  46. # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
  47. 'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
  48. 'only_matching': True,
  49. }]
  50. def _real_extract(self, url):
  51. video_id = self._match_id(url)
  52. video_list = self._download_json(
  53. 'http://cdnsel01.mediaset.net/GetCdn.aspx',
  54. video_id, 'Downloading video CDN JSON', query={
  55. 'streamid': video_id,
  56. 'format': 'json',
  57. })['videoList']
  58. formats = []
  59. for format_url in video_list:
  60. if '.ism' in format_url:
  61. formats.extend(self._extract_ism_formats(
  62. format_url, video_id, ism_id='mss', fatal=False))
  63. else:
  64. formats.append({
  65. 'url': format_url,
  66. 'format_id': determine_ext(format_url),
  67. })
  68. self._sort_formats(formats)
  69. mediainfo = self._download_json(
  70. 'http://plr.video.mediaset.it/html/metainfo.sjson',
  71. video_id, 'Downloading video info JSON', query={
  72. 'id': video_id,
  73. })['video']
  74. title = mediainfo['title']
  75. creator = try_get(
  76. mediainfo, lambda x: x['brand-info']['publisher'], compat_str)
  77. category = try_get(
  78. mediainfo, lambda x: x['brand-info']['category'], compat_str)
  79. categories = [category] if category else None
  80. return {
  81. 'id': video_id,
  82. 'title': title,
  83. 'description': mediainfo.get('short-description'),
  84. 'thumbnail': mediainfo.get('thumbnail'),
  85. 'duration': parse_duration(mediainfo.get('duration')),
  86. 'creator': creator,
  87. 'release_date': unified_strdate(mediainfo.get('production-date')),
  88. 'webpage_url': mediainfo.get('url'),
  89. 'series': mediainfo.get('brand-value'),
  90. 'categories': categories,
  91. 'formats': formats,
  92. }