bbccouk.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import ExtractorError
  5. class BBCCoUkIE(InfoExtractor):
  6. IE_NAME = 'bbc.co.uk'
  7. IE_DESC = 'BBC - iPlayer Radio'
  8. _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
  9. _TEST = {
  10. 'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
  11. 'info_dict': {
  12. 'id': 'p01q7wz4',
  13. 'ext': 'flv',
  14. 'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
  15. 'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
  16. 'duration': 1936,
  17. },
  18. 'params': {
  19. # rtmp download
  20. 'skip_download': True,
  21. }
  22. }
  23. def _real_extract(self, url):
  24. mobj = re.match(self._VALID_URL, url)
  25. group_id = mobj.group('id')
  26. playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
  27. 'Downloading playlist XML')
  28. item = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}item')
  29. if item is None:
  30. no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
  31. if no_items is not None:
  32. reason = no_items.get('reason')
  33. if reason == 'preAvailability':
  34. msg = 'Episode %s is not yet available' % group_id
  35. elif reason == 'postAvailability':
  36. msg = 'Episode %s is no longer available' % group_id
  37. else:
  38. msg = 'Episode %s is not available: %s' % (group_id, reason)
  39. raise ExtractorError(msg, expected=True)
  40. raise ExtractorError('Failed to extract media for episode %s' % group_id, expected=True)
  41. title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
  42. description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
  43. radio_programme_id = item.get('identifier')
  44. duration = int(item.get('duration'))
  45. media_selection = self._download_xml(
  46. 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % radio_programme_id,
  47. radio_programme_id, 'Downloading media selection XML')
  48. formats = []
  49. for media in media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media'):
  50. bitrate = int(media.get('bitrate'))
  51. encoding = media.get('encoding')
  52. service = media.get('service')
  53. connection = media.find('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
  54. protocol = connection.get('protocol')
  55. priority = connection.get('priority')
  56. supplier = connection.get('supplier')
  57. if protocol == 'http':
  58. href = connection.get('href')
  59. # ASX playlist
  60. if supplier == 'asx':
  61. asx = self._download_xml(href, radio_programme_id, 'Downloading %s ASX playlist' % service)
  62. for i, ref in enumerate(asx.findall('./Entry/ref')):
  63. formats.append({
  64. 'url': ref.get('href'),
  65. 'format_id': '%s_ref%s' % (service, i),
  66. 'abr': bitrate,
  67. 'acodec': encoding,
  68. 'preference': priority,
  69. })
  70. continue
  71. # Direct link
  72. formats.append({
  73. 'url': href,
  74. 'format_id': service,
  75. 'abr': bitrate,
  76. 'acodec': encoding,
  77. 'preference': priority,
  78. })
  79. elif protocol == 'rtmp':
  80. application = connection.get('application', 'ondemand')
  81. auth_string = connection.get('authString')
  82. identifier = connection.get('identifier')
  83. server = connection.get('server')
  84. formats.append({
  85. 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
  86. 'play_path': identifier,
  87. 'app': '%s?%s' % (application, auth_string),
  88. 'rtmp_live': False,
  89. 'ext': 'flv',
  90. 'format_id': service,
  91. 'abr': bitrate,
  92. 'acodec': encoding,
  93. 'preference': priority,
  94. })
  95. self._sort_formats(formats)
  96. return {
  97. 'id': radio_programme_id,
  98. 'title': title,
  99. 'description': description,
  100. 'duration': duration,
  101. 'formats': formats,
  102. }