xiami.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. # -*- coding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. xpath_element,
  6. xpath_text,
  7. xpath_with_ns,
  8. int_or_none,
  9. ExtractorError
  10. )
  11. from ..compat import compat_urllib_parse_unquote
  12. class XiamiBaseIE(InfoExtractor):
  13. _XML_BASE_URL = 'http://www.xiami.com/song/playlist/id'
  14. _NS_MAP = {'xm': 'http://xspf.org/ns/0/'}
  15. def _extract_track(self, track):
  16. artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='')
  17. artist = artist.split(';')
  18. ret = {
  19. 'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)),
  20. 'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)),
  21. 'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)),
  22. 'artist': ';'.join(artist) if artist else None,
  23. 'creator': artist[0] if artist else None,
  24. 'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))),
  25. 'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None),
  26. 'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))),
  27. }
  28. lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP))
  29. if lyrics_url and lyrics_url.endswith('.lrc'):
  30. ret['description'] = self._download_webpage(lyrics_url, ret['id'])
  31. return ret
  32. def _extract_xml(self, _id, typ=''):
  33. playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id)
  34. tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP))
  35. if not len(tracklist):
  36. raise ExtractorError('No track found')
  37. return [self._extract_track(track) for track in tracklist]
  38. @staticmethod
  39. def _decrypt(origin):
  40. n = int(origin[0])
  41. origin = origin[1:]
  42. short_lenth = len(origin) // n
  43. long_num = len(origin) - short_lenth * n
  44. l = tuple()
  45. for i in range(0, n):
  46. length = short_lenth
  47. if i < long_num:
  48. length += 1
  49. l += (origin[0:length], )
  50. origin = origin[length:]
  51. ans = ''
  52. for i in range(0, short_lenth + 1):
  53. for j in range(0, n):
  54. if len(l[j])>i:
  55. ans += l[j][i]
  56. return compat_urllib_parse_unquote(ans).replace('^', '0')
  57. class XiamiIE(XiamiBaseIE):
  58. IE_NAME = 'xiami:song'
  59. IE_DESC = '虾米音乐'
  60. _VALID_URL = r'http://www\.xiami\.com/song/(?P<id>[0-9]+)'
  61. _TESTS = [
  62. {
  63. 'url': 'http://www.xiami.com/song/1775610518',
  64. 'md5': '521dd6bea40fd5c9c69f913c232cb57e',
  65. 'info_dict': {
  66. 'id': '1775610518',
  67. 'ext': 'mp3',
  68. 'title': 'Woman',
  69. 'creator': 'HONNE',
  70. 'album': 'Woman',
  71. 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
  72. 'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b',
  73. }
  74. },
  75. {
  76. 'url': 'http://www.xiami.com/song/1775256504',
  77. 'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
  78. 'info_dict': {
  79. 'id': '1775256504',
  80. 'ext': 'mp3',
  81. 'title': '悟空',
  82. 'creator': '戴荃',
  83. 'album': '悟空',
  84. 'description': 'md5:206e67e84f9bed1d473d04196a00b990',
  85. }
  86. },
  87. ]
  88. def _real_extract(self, url):
  89. _id = self._match_id(url)
  90. return self._extract_xml(_id)[0]
  91. class XiamiAlbumIE(XiamiBaseIE):
  92. IE_NAME = 'xiami:album'
  93. IE_DESC = '虾米音乐 - 专辑'
  94. _VALID_URL = r'http://www\.xiami\.com/album/(?P<id>[0-9]+)'
  95. _TESTS = [
  96. {
  97. 'url': 'http://www.xiami.com/album/2100300444',
  98. 'info_dict': {
  99. 'id': '2100300444',
  100. },
  101. 'playlist_count': 10,
  102. },
  103. {
  104. 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
  105. 'only_matching': True,
  106. }
  107. ]
  108. def _real_extract(self, url):
  109. _id = self._match_id(url)
  110. return self.playlist_result(self._extract_xml(_id, '/type/1'), _id)
  111. class XiamiArtistIE(XiamiBaseIE):
  112. IE_NAME = 'xiami:artist'
  113. IE_DESC = '虾米音乐 - 歌手'
  114. _VALID_URL = r'http://www\.xiami\.com/artist/(?P<id>[0-9]+)'
  115. _TEST = {
  116. 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
  117. 'info_dict': {
  118. 'id': '2132',
  119. },
  120. 'playlist_count': 20,
  121. }
  122. def _real_extract(self, url):
  123. _id = self._match_id(url)
  124. return self.playlist_result(self._extract_xml(_id, '/type/2'), _id)
  125. class XiamiCollectionIE(XiamiBaseIE):
  126. IE_NAME = 'xiami:collection'
  127. IE_DESC = '虾米音乐 - 精选集'
  128. _VALID_URL = r'http://www\.xiami\.com/collect/(?P<id>[0-9]+)'
  129. _TEST = {
  130. 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
  131. 'info_dict': {
  132. 'id': '156527391',
  133. },
  134. 'playlist_count': 26,
  135. }
  136. def _real_extract(self, url):
  137. _id = self._match_id(url)
  138. return self.playlist_result(self._extract_xml(_id, '/type/3'), _id)