myspace.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. from __future__ import unicode_literals
  2. import re
  3. import json
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_str,
  7. )
  8. from ..utils import ExtractorError
  9. class MySpaceIE(InfoExtractor):
  10. _VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'
  11. _TESTS = [
  12. {
  13. 'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
  14. 'info_dict': {
  15. 'id': '100008689',
  16. 'ext': 'flv',
  17. 'title': 'Viva La Vida',
  18. 'description': 'The official Viva La Vida video, directed by Hype Williams',
  19. 'uploader': 'Coldplay',
  20. 'uploader_id': 'coldplay',
  21. },
  22. 'params': {
  23. # rtmp download
  24. 'skip_download': True,
  25. },
  26. },
  27. # songs
  28. {
  29. 'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
  30. 'md5': 'f1d7323321f6b7775bf1e3754c1707dc',
  31. 'info_dict': {
  32. 'id': '93388656',
  33. 'ext': 'flv',
  34. 'playlist': 'The Demo',
  35. 'title': 'Of weakened soul...',
  36. 'uploader': 'Killsorrow',
  37. 'uploader_id': 'killsorrow',
  38. },
  39. 'params': {
  40. # rtmp download
  41. 'skip_download': True,
  42. },
  43. }, {
  44. 'add_ie': ['Vevo'],
  45. 'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
  46. 'info_dict': {
  47. 'id': u'USZM20600099',
  48. 'title': u'Animal I Have Become',
  49. 'uploader': u'Three Days Grace',
  50. 'timestamp': int,
  51. },
  52. 'skip': 'VEVO is only available in some countries',
  53. }, {
  54. 'add_ie': ['Youtube'],
  55. 'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
  56. 'info_dict': {
  57. 'id': 'ypWvQgnJrSU',
  58. 'title': 'Starset - First Light',
  59. 'uploader': 'Jacob Soren',
  60. 'uploader_id': 'SorenPromotions',
  61. 'upload_date': '20140725',
  62. }
  63. },
  64. ]
  65. def _real_extract(self, url):
  66. mobj = re.match(self._VALID_URL, url)
  67. video_id = mobj.group('id')
  68. webpage = self._download_webpage(url, video_id)
  69. player_url = self._search_regex(
  70. r'playerSwf":"([^"?]*)', webpage, 'player URL')
  71. if mobj.group('mediatype').startswith('music/song'):
  72. # songs don't store any useful info in the 'context' variable
  73. song_data = self._search_regex(
  74. r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
  75. webpage, 'song_data', default=None, group=0)
  76. if song_data is None:
  77. self.to_screen(
  78. '%s: No downloadable song on this page' % video_id)
  79. return
  80. def search_data(name):
  81. return self._search_regex(
  82. r'''data-%s=([\'"])(.*?)\1''' % name,
  83. song_data, name, default='', group=2)
  84. streamUrl = search_data('stream-url')
  85. if not streamUrl:
  86. vevo_id = search_data('vevo-id')
  87. youtube_id = search_data('youtube-id')
  88. if vevo_id:
  89. self.to_screen('Vevo video detected: %s' % vevo_id)
  90. return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
  91. elif youtube_id:
  92. self.to_screen('Youtube video detected: %s' % youtube_id)
  93. return self.url_result(youtube_id, ie='Youtube')
  94. else:
  95. raise ExtractorError(
  96. 'Found song but don\'t know how to download it')
  97. info = {
  98. 'id': video_id,
  99. 'title': self._og_search_title(webpage),
  100. 'uploader': search_data('artist-name'),
  101. 'uploader_id': search_data('artist-username'),
  102. 'playlist': search_data('album-title'),
  103. 'thumbnail': self._og_search_thumbnail(webpage),
  104. }
  105. else:
  106. context = json.loads(self._search_regex(
  107. r'context = ({.*?});', webpage, 'context'))
  108. video = context['video']
  109. streamUrl = video['streamUrl']
  110. info = {
  111. 'id': compat_str(video['mediaId']),
  112. 'title': video['title'],
  113. 'description': video['description'],
  114. 'thumbnail': video['imageUrl'],
  115. 'uploader': video['artistName'],
  116. 'uploader_id': video['artistUsername'],
  117. }
  118. rtmp_url, play_path = streamUrl.split(';', 1)
  119. info.update({
  120. 'url': rtmp_url,
  121. 'play_path': play_path,
  122. 'player_url': player_url,
  123. 'ext': 'flv',
  124. })
  125. return info