neteasemusic.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from hashlib import md5
  4. from base64 import b64encode
  5. from datetime import datetime
  6. import itertools
  7. import re
  8. from .common import InfoExtractor
  9. from ..compat import (
  10. compat_urllib_request,
  11. compat_urllib_parse,
  12. compat_str,
  13. )
  14. class NetEaseMusicBaseIE(InfoExtractor):
  15. _FORMATS = ['bMusic', 'mMusic', 'hMusic']
  16. _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
  17. _API_BASE = 'http://music.163.com/api/'
  18. @classmethod
  19. def _encrypt(cls, dfsid):
  20. salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
  21. string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
  22. salt_len = len(salt_bytes)
  23. for i in range(len(string_bytes)):
  24. string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
  25. m = md5()
  26. m.update(bytes(string_bytes))
  27. result = b64encode(m.digest()).decode('ascii')
  28. return result.replace('/', '_').replace('+', '-')
  29. @classmethod
  30. def extract_formats(cls, info):
  31. formats = []
  32. for song_format in cls._FORMATS:
  33. details = info.get(song_format)
  34. if not details:
  35. continue
  36. formats.append({
  37. 'url': 'http://m1.music.126.net/%s/%s.%s' %
  38. (cls._encrypt(details['dfsId']), details['dfsId'],
  39. details['extension']),
  40. 'ext': details.get('extension'),
  41. 'abr': details.get('bitrate', 0) / 1000,
  42. 'format_id': song_format,
  43. 'filesize': details.get('size'),
  44. 'asr': details.get('sr')
  45. })
  46. return formats
  47. @classmethod
  48. def convert_milliseconds(cls, ms):
  49. return int(round(ms / 1000.0))
  50. def query_api(self, endpoint, video_id, note):
  51. req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint))
  52. req.add_header('Referer', self._API_BASE)
  53. return self._download_json(req, video_id, note)
  54. class NetEaseMusicIE(NetEaseMusicBaseIE):
  55. IE_NAME = 'netease:song'
  56. _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
  57. _TESTS = [{
  58. 'url': 'http://music.163.com/#/song?id=32102397',
  59. 'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
  60. 'info_dict': {
  61. 'id': '32102397',
  62. 'ext': 'mp3',
  63. 'title': 'Bad Blood (feat. Kendrick Lamar)',
  64. 'creator': 'Taylor Swift / Kendrick Lamar',
  65. 'upload_date': '20150517',
  66. 'timestamp': 1431878400,
  67. 'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
  68. },
  69. }, {
  70. 'note': 'No lyrics translation.',
  71. 'url': 'http://music.163.com/#/song?id=29822014',
  72. 'info_dict': {
  73. 'id': '29822014',
  74. 'ext': 'mp3',
  75. 'title': '听见下雨的声音',
  76. 'creator': '周杰伦',
  77. 'upload_date': '20141225',
  78. 'timestamp': 1419523200,
  79. 'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
  80. },
  81. }, {
  82. 'note': 'No lyrics.',
  83. 'url': 'http://music.163.com/song?id=17241424',
  84. 'info_dict': {
  85. 'id': '17241424',
  86. 'ext': 'mp3',
  87. 'title': 'Opus 28',
  88. 'creator': 'Dustin O\'Halloran',
  89. 'upload_date': '20080211',
  90. 'timestamp': 1202745600,
  91. },
  92. }, {
  93. 'note': 'Has translated name.',
  94. 'url': 'http://music.163.com/#/song?id=22735043',
  95. 'info_dict': {
  96. 'id': '22735043',
  97. 'ext': 'mp3',
  98. 'title': '소원을 말해봐 (Genie)',
  99. 'creator': '少女时代',
  100. 'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
  101. 'upload_date': '20100127',
  102. 'timestamp': 1264608000,
  103. 'alt_title': '说出愿望吧(Genie)',
  104. }
  105. }]
  106. def _process_lyrics(self, lyrics_info):
  107. original = lyrics_info.get('lrc', {}).get('lyric')
  108. translated = lyrics_info.get('tlyric', {}).get('lyric')
  109. if not translated:
  110. return original
  111. lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
  112. original_ts_texts = re.findall(lyrics_expr, original)
  113. translation_ts_dict = dict(
  114. (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
  115. )
  116. lyrics = '\n'.join([
  117. '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
  118. for time_stamp, text in original_ts_texts
  119. ])
  120. return lyrics
  121. def _real_extract(self, url):
  122. song_id = self._match_id(url)
  123. params = {
  124. 'id': song_id,
  125. 'ids': '[%s]' % song_id
  126. }
  127. info = self.query_api(
  128. 'song/detail?' + compat_urllib_parse.urlencode(params),
  129. song_id, 'Downloading song info')['songs'][0]
  130. formats = self.extract_formats(info)
  131. self._sort_formats(formats)
  132. lyrics_info = self.query_api(
  133. 'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
  134. song_id, 'Downloading lyrics data')
  135. lyrics = self._process_lyrics(lyrics_info)
  136. alt_title = None
  137. if info.get('transNames'):
  138. alt_title = '/'.join(info.get('transNames'))
  139. return {
  140. 'id': song_id,
  141. 'title': info['name'],
  142. 'alt_title': alt_title,
  143. 'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
  144. 'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
  145. 'thumbnail': info.get('album', {}).get('picUrl'),
  146. 'duration': self.convert_milliseconds(info.get('duration', 0)),
  147. 'description': lyrics,
  148. 'formats': formats,
  149. }
  150. class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
  151. IE_NAME = 'netease:album'
  152. _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
  153. _TEST = {
  154. 'url': 'http://music.163.com/#/album?id=220780',
  155. 'info_dict': {
  156. 'id': '220780',
  157. 'title': 'B\'day',
  158. },
  159. 'playlist_count': 23,
  160. }
  161. def _real_extract(self, url):
  162. album_id = self._match_id(url)
  163. info = self.query_api(
  164. 'album/%s?id=%s' % (album_id, album_id),
  165. album_id, 'Downloading album data')['album']
  166. name = info['name']
  167. desc = info.get('description')
  168. entries = [
  169. self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
  170. 'NetEaseMusic', song['id'])
  171. for song in info['songs']
  172. ]
  173. return self.playlist_result(entries, album_id, name, desc)
  174. class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
  175. IE_NAME = 'netease:singer'
  176. _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
  177. _TESTS = [{
  178. 'note': 'Singer has aliases.',
  179. 'url': 'http://music.163.com/#/artist?id=10559',
  180. 'info_dict': {
  181. 'id': '10559',
  182. 'title': '张惠妹 - aMEI;阿密特',
  183. },
  184. 'playlist_count': 50,
  185. }, {
  186. 'note': 'Singer has translated name.',
  187. 'url': 'http://music.163.com/#/artist?id=124098',
  188. 'info_dict': {
  189. 'id': '124098',
  190. 'title': '李昇基 - 이승기',
  191. },
  192. 'playlist_count': 50,
  193. }]
  194. def _real_extract(self, url):
  195. singer_id = self._match_id(url)
  196. info = self.query_api(
  197. 'artist/%s?id=%s' % (singer_id, singer_id),
  198. singer_id, 'Downloading singer data')
  199. name = info['artist']['name']
  200. if info['artist']['trans']:
  201. name = '%s - %s' % (name, info['artist']['trans'])
  202. if info['artist']['alias']:
  203. name = '%s - %s' % (name, ";".join(info['artist']['alias']))
  204. entries = [
  205. self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
  206. 'NetEaseMusic', song['id'])
  207. for song in info['hotSongs']
  208. ]
  209. return self.playlist_result(entries, singer_id, name)
  210. class NetEaseMusicListIE(NetEaseMusicBaseIE):
  211. IE_NAME = 'netease:playlist'
  212. _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
  213. _TESTS = [{
  214. 'url': 'http://music.163.com/#/playlist?id=79177352',
  215. 'info_dict': {
  216. 'id': '79177352',
  217. 'title': 'Billboard 2007 Top 100',
  218. 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
  219. },
  220. 'playlist_count': 99,
  221. }, {
  222. 'note': 'Toplist/Charts sample',
  223. 'url': 'http://music.163.com/#/discover/toplist?id=3733003',
  224. 'info_dict': {
  225. 'id': '3733003',
  226. 'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
  227. 'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
  228. },
  229. 'playlist_count': 50,
  230. }]
  231. def _real_extract(self, url):
  232. list_id = self._match_id(url)
  233. info = self.query_api(
  234. 'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
  235. list_id, 'Downloading playlist data')['result']
  236. name = info['name']
  237. desc = info.get('description')
  238. if info.get('specialType') == 10: # is a chart/toplist
  239. datestamp = datetime.fromtimestamp(
  240. self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
  241. name = '%s %s' % (name, datestamp)
  242. entries = [
  243. self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
  244. 'NetEaseMusic', song['id'])
  245. for song in info['tracks']
  246. ]
  247. return self.playlist_result(entries, list_id, name, desc)
  248. class NetEaseMusicMvIE(NetEaseMusicBaseIE):
  249. IE_NAME = 'netease:mv'
  250. _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
  251. _TEST = {
  252. 'url': 'http://music.163.com/#/mv?id=415350',
  253. 'info_dict': {
  254. 'id': '415350',
  255. 'ext': 'mp4',
  256. 'title': '이럴거면 그러지말지',
  257. 'description': '白雅言自作曲唱甜蜜爱情',
  258. 'creator': '白雅言',
  259. 'upload_date': '20150520',
  260. },
  261. }
  262. def _real_extract(self, url):
  263. mv_id = self._match_id(url)
  264. info = self.query_api(
  265. 'mv/detail?id=%s&type=mp4' % mv_id,
  266. mv_id, 'Downloading mv info')['data']
  267. formats = [
  268. {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
  269. for brs, mv_url in info['brs'].items()
  270. ]
  271. self._sort_formats(formats)
  272. return {
  273. 'id': mv_id,
  274. 'title': info['name'],
  275. 'description': info.get('desc') or info.get('briefDesc'),
  276. 'creator': info['artistName'],
  277. 'upload_date': info['publishTime'].replace('-', ''),
  278. 'formats': formats,
  279. 'thumbnail': info.get('cover'),
  280. 'duration': self.convert_milliseconds(info.get('duration', 0)),
  281. }
  282. class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
  283. IE_NAME = 'netease:program'
  284. _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
  285. _TESTS = [{
  286. 'url': 'http://music.163.com/#/program?id=10109055',
  287. 'info_dict': {
  288. 'id': '10109055',
  289. 'ext': 'mp3',
  290. 'title': '不丹足球背后的故事',
  291. 'description': '喜马拉雅人的足球梦 ...',
  292. 'creator': '大话西藏',
  293. 'timestamp': 1434179342,
  294. 'upload_date': '20150613',
  295. 'duration': 900,
  296. },
  297. }, {
  298. 'note': 'This program has accompanying songs.',
  299. 'url': 'http://music.163.com/#/program?id=10141022',
  300. 'info_dict': {
  301. 'id': '10141022',
  302. 'title': '25岁,你是自在如风的少年<27°C>',
  303. 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
  304. },
  305. 'playlist_count': 4,
  306. }, {
  307. 'note': 'This program has accompanying songs.',
  308. 'url': 'http://music.163.com/#/program?id=10141022',
  309. 'info_dict': {
  310. 'id': '10141022',
  311. 'ext': 'mp3',
  312. 'title': '25岁,你是自在如风的少年<27°C>',
  313. 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
  314. 'timestamp': 1434450841,
  315. 'upload_date': '20150616',
  316. },
  317. 'params': {
  318. 'noplaylist': True
  319. }
  320. }]
  321. def _real_extract(self, url):
  322. program_id = self._match_id(url)
  323. info = self.query_api(
  324. 'dj/program/detail?id=%s' % program_id,
  325. program_id, 'Downloading program info')['program']
  326. name = info['name']
  327. description = info['description']
  328. if not info['songs'] or self._downloader.params.get('noplaylist'):
  329. if info['songs']:
  330. self.to_screen(
  331. 'Downloading just the main audio %s because of --no-playlist'
  332. % info['mainSong']['id'])
  333. formats = self.extract_formats(info['mainSong'])
  334. self._sort_formats(formats)
  335. return {
  336. 'id': program_id,
  337. 'title': name,
  338. 'description': description,
  339. 'creator': info['dj']['brand'],
  340. 'timestamp': self.convert_milliseconds(info['createTime']),
  341. 'thumbnail': info['coverUrl'],
  342. 'duration': self.convert_milliseconds(info.get('duration', 0)),
  343. 'formats': formats,
  344. }
  345. self.to_screen(
  346. 'Downloading playlist %s - add --no-playlist to just download the main audio %s'
  347. % (program_id, info['mainSong']['id']))
  348. song_ids = [info['mainSong']['id']]
  349. song_ids.extend([song['id'] for song in info['songs']])
  350. entries = [
  351. self.url_result('http://music.163.com/#/song?id=%s' % song_id,
  352. 'NetEaseMusic', song_id)
  353. for song_id in song_ids
  354. ]
  355. return self.playlist_result(entries, program_id, name, description)
  356. class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
  357. IE_NAME = 'netease:djradio'
  358. _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
  359. _TEST = {
  360. 'url': 'http://music.163.com/#/djradio?id=42',
  361. 'info_dict': {
  362. 'id': '42',
  363. 'title': '声音蔓延',
  364. 'description': 'md5:766220985cbd16fdd552f64c578a6b15'
  365. },
  366. 'playlist_mincount': 40,
  367. }
  368. _PAGE_SIZE = 1000
  369. def _real_extract(self, url):
  370. dj_id = self._match_id(url)
  371. name = None
  372. desc = None
  373. entries = []
  374. for offset in itertools.count(start=0, step=self._PAGE_SIZE):
  375. info = self.query_api(
  376. 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
  377. % (self._PAGE_SIZE, dj_id, offset),
  378. dj_id, 'Downloading dj programs - %d' % offset)
  379. entries.extend([
  380. self.url_result(
  381. 'http://music.163.com/#/program?id=%s' % program['id'],
  382. 'NetEaseMusicProgram', program['id'])
  383. for program in info['programs']
  384. ])
  385. if name is None:
  386. radio = info['programs'][0]['radio']
  387. name = radio['name']
  388. desc = radio['desc']
  389. if not info['more']:
  390. break
  391. return self.playlist_result(entries, dj_id, name, desc)