myspace.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. from __future__ import unicode_literals
  2. import re
  3. import json
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_str,
  7. )
  8. from ..utils import ExtractorError
  9. class MySpaceIE(InfoExtractor):
  10. _VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'
  11. _TESTS = [
  12. {
  13. 'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
  14. 'info_dict': {
  15. 'id': '100008689',
  16. 'ext': 'flv',
  17. 'title': 'Viva La Vida',
  18. 'description': 'The official Viva La Vida video, directed by Hype Williams',
  19. 'uploader': 'Coldplay',
  20. 'uploader_id': 'coldplay',
  21. },
  22. 'params': {
  23. # rtmp download
  24. 'skip_download': True,
  25. },
  26. },
  27. # song
  28. {
  29. 'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242',
  30. 'info_dict': {
  31. 'id': '39008454',
  32. 'ext': 'flv',
  33. 'title': 'Darkness In My Heart',
  34. 'uploader_id': 'spiderbags',
  35. },
  36. 'params': {
  37. # rtmp download
  38. 'skip_download': True,
  39. },
  40. },
  41. ]
  42. def _real_extract(self, url):
  43. mobj = re.match(self._VALID_URL, url)
  44. video_id = mobj.group('id')
  45. webpage = self._download_webpage(url, video_id)
  46. player_url = self._search_regex(
  47. r'playerSwf":"([^"?]*)', webpage, 'player URL')
  48. if mobj.group('mediatype').startswith('music/song'):
  49. # songs don't store any useful info in the 'context' variable
  50. song_data = self._search_regex(
  51. r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
  52. webpage, 'song_data', default=None, group=0)
  53. if song_data is None:
  54. self.to_screen(
  55. '%s: No downloadable song on this page' % video_id)
  56. return
  57. def search_data(name):
  58. return self._search_regex(
  59. r'''data-%s=([\'"])(.*?)\1''' % name,
  60. song_data, name, default='', group=2)
  61. streamUrl = search_data('stream-url')
  62. if not streamUrl:
  63. vevo_id = search_data('vevo-id')
  64. youtube_id = search_data('youtube-id')
  65. if vevo_id:
  66. self.to_screen('Vevo video detected: %s' % vevo_id)
  67. return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
  68. elif youtube_id:
  69. self.to_screen('Youtube video detected: %s' % youtube_id)
  70. return self.url_result(youtube_id, ie='Youtube')
  71. else:
  72. raise ExtractorError(
  73. 'Found song but don\'t know how to download it')
  74. info = {
  75. 'id': video_id,
  76. 'title': self._og_search_title(webpage),
  77. 'uploader': search_data('artist-name'),
  78. 'uploader_id': search_data('artist-username'),
  79. 'playlist': search_data('album-title'),
  80. 'thumbnail': self._og_search_thumbnail(webpage),
  81. }
  82. else:
  83. context = json.loads(self._search_regex(
  84. r'context = ({.*?});', webpage, 'context'))
  85. video = context['video']
  86. streamUrl = video['streamUrl']
  87. info = {
  88. 'id': compat_str(video['mediaId']),
  89. 'title': video['title'],
  90. 'description': video['description'],
  91. 'thumbnail': video['imageUrl'],
  92. 'uploader': video['artistName'],
  93. 'uploader_id': video['artistUsername'],
  94. }
  95. rtmp_url, play_path = streamUrl.split(';', 1)
  96. info.update({
  97. 'url': rtmp_url,
  98. 'play_path': play_path,
  99. 'player_url': player_url,
  100. 'ext': 'flv',
  101. })
  102. return info