ccma.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import datetime
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. clean_html,
  8. int_or_none,
  9. parse_duration,
  10. parse_resolution,
  11. try_get,
  12. url_or_none,
  13. )
  14. class CCMAIE(InfoExtractor):
  15. _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
  16. _TESTS = [{
  17. 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
  18. 'md5': '7296ca43977c8ea4469e719c609b0871',
  19. 'info_dict': {
  20. 'id': '5630208',
  21. 'ext': 'mp4',
  22. 'title': 'L\'espot de La Marató de TV3',
  23. 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
  24. 'timestamp': 1478608140,
  25. 'upload_date': '20161108',
  26. 'age_limit': 0,
  27. }
  28. }, {
  29. 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
  30. 'md5': 'fa3e38f269329a278271276330261425',
  31. 'info_dict': {
  32. 'id': '943685',
  33. 'ext': 'mp3',
  34. 'title': 'El Consell de Savis analitza el derbi',
  35. 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
  36. 'upload_date': '20170512',
  37. 'timestamp': 1494622500,
  38. 'vcodec': 'none',
  39. 'categories': ['Esports'],
  40. }
  41. }, {
  42. 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
  43. 'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
  44. 'info_dict': {
  45. 'id': '6031387',
  46. 'ext': 'mp4',
  47. 'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
  48. 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
  49. 'timestamp': 1582577700,
  50. 'upload_date': '20200224',
  51. 'subtitles': 'mincount:4',
  52. 'age_limit': 16,
  53. 'series': 'Crims',
  54. }
  55. }]
  56. def _real_extract(self, url):
  57. media_type, media_id = re.match(self._VALID_URL, url).groups()
  58. media = self._download_json(
  59. 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
  60. 'media': media_type,
  61. 'idint': media_id,
  62. })
  63. formats = []
  64. media_url = media['media']['url']
  65. if isinstance(media_url, list):
  66. for format_ in media_url:
  67. format_url = url_or_none(format_.get('file'))
  68. if not format_url:
  69. continue
  70. label = format_.get('label')
  71. f = parse_resolution(label)
  72. f.update({
  73. 'url': format_url,
  74. 'format_id': label,
  75. })
  76. formats.append(f)
  77. else:
  78. formats.append({
  79. 'url': media_url,
  80. 'vcodec': 'none' if media_type == 'audio' else None,
  81. })
  82. self._sort_formats(formats)
  83. informacio = media['informacio']
  84. title = informacio['titol']
  85. durada = informacio.get('durada') or {}
  86. duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
  87. tematica = try_get(informacio, lambda x: x['tematica']['text'])
  88. timestamp = None
  89. data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
  90. try:
  91. timestamp = datetime.datetime.strptime(
  92. data_utc, '%Y-%d-%mT%H:%M:%S%z').timestamp()
  93. except TypeError:
  94. pass
  95. subtitles = {}
  96. subtitols = media.get('subtitols') or []
  97. if isinstance(subtitols, dict):
  98. subtitols = [subtitols]
  99. for st in subtitols:
  100. sub_url = st.get('url')
  101. if sub_url:
  102. subtitles.setdefault(
  103. st.get('iso') or st.get('text') or 'ca', []).append({
  104. 'url': sub_url,
  105. })
  106. thumbnails = []
  107. imatges = media.get('imatges', {})
  108. if imatges:
  109. thumbnail_url = imatges.get('url')
  110. if thumbnail_url:
  111. thumbnails = [{
  112. 'url': thumbnail_url,
  113. 'width': int_or_none(imatges.get('amplada')),
  114. 'height': int_or_none(imatges.get('alcada')),
  115. }]
  116. age_limit = None
  117. codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
  118. if codi_etic:
  119. codi_etic_s = codi_etic.split('_')
  120. if len(codi_etic_s) == 2:
  121. if codi_etic_s[1] == 'TP':
  122. age_limit = 0
  123. else:
  124. age_limit = int_or_none(codi_etic_s[1])
  125. return {
  126. 'id': media_id,
  127. 'title': title,
  128. 'description': clean_html(informacio.get('descripcio')),
  129. 'duration': duration,
  130. 'timestamp': timestamp,
  131. 'thumbnails': thumbnails,
  132. 'subtitles': subtitles,
  133. 'formats': formats,
  134. 'age_limit': age_limit,
  135. 'alt_title': informacio.get('titol_complet'),
  136. 'episode_number': int_or_none(informacio.get('capitol')),
  137. 'categories': [tematica] if tematica else None,
  138. 'series': informacio.get('programa'),
  139. }