thesixtyone.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import unified_strdate
  7. class TheSixtyOneIE(InfoExtractor):
  8. _VALID_URL = r'''(?x)https?://(?:www\.)?thesixtyone\.com/
  9. (?:.*?/)*
  10. (?:
  11. s|
  12. song/comments/list|
  13. song
  14. )/(?P<id>[A-Za-z0-9]+)/?$'''
  15. _SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
  16. _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream'
  17. _THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
  18. _TESTS = [
  19. {
  20. 'url': 'http://www.thesixtyone.com/s/SrE3zD7s1jt/',
  21. 'md5': '821cc43b0530d3222e3e2b70bb4622ea',
  22. 'info_dict': {
  23. 'id': 'SrE3zD7s1jt',
  24. 'ext': 'mp3',
  25. 'title': 'CASIO - Unicorn War Mixtape',
  26. 'thumbnail': 're:^https?://.*_desktop$',
  27. 'upload_date': '20071217',
  28. 'duration': 3208,
  29. }
  30. },
  31. {
  32. 'url': 'http://www.thesixtyone.com/song/comments/list/SrE3zD7s1jt',
  33. 'only_matching': True,
  34. },
  35. {
  36. 'url': 'http://www.thesixtyone.com/s/ULoiyjuJWli#/s/SrE3zD7s1jt/',
  37. 'only_matching': True,
  38. },
  39. {
  40. 'url': 'http://www.thesixtyone.com/#/s/SrE3zD7s1jt/',
  41. 'only_matching': True,
  42. },
  43. {
  44. 'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/',
  45. 'only_matching': True,
  46. },
  47. ]
  48. _DECODE_MAP = {
  49. "x": "a",
  50. "m": "b",
  51. "w": "c",
  52. "q": "d",
  53. "n": "e",
  54. "p": "f",
  55. "a": "0",
  56. "h": "1",
  57. "e": "2",
  58. "u": "3",
  59. "s": "4",
  60. "i": "5",
  61. "o": "6",
  62. "y": "7",
  63. "r": "8",
  64. "c": "9"
  65. }
  66. def _real_extract(self, url):
  67. mobj = re.match(self._VALID_URL, url)
  68. song_id = mobj.group('id')
  69. webpage = self._download_webpage(
  70. self._SONG_URL_TEMPLATE.format(song_id), song_id)
  71. song_data = json.loads(self._search_regex(
  72. r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'))
  73. if self._search_regex(r'(t61\.s3_audio_load\s*=\s*1\.0;)', webpage, 's3_audio_load marker', default=None):
  74. song_data['audio_server'] = 's3.amazonaws.com'
  75. else:
  76. song_data['audio_server'] = song_data['audio_server'] + '.thesixtyone.com'
  77. keys = [self._DECODE_MAP.get(s, s) for s in song_data['key']]
  78. url = self._SONG_FILE_URL_TEMPLATE.format(
  79. "".join(reversed(keys)), **song_data)
  80. formats = [{
  81. 'format_id': 'sd',
  82. 'url': url,
  83. 'ext': 'mp3',
  84. }]
  85. return {
  86. 'id': song_id,
  87. 'title': '{artist:} - {name:}'.format(**song_data),
  88. 'formats': formats,
  89. 'comment_count': song_data.get('comments_count'),
  90. 'duration': song_data.get('play_time'),
  91. 'like_count': song_data.get('score'),
  92. 'thumbnail': self._THUMBNAIL_URL_TEMPLATE.format(**song_data),
  93. 'upload_date': unified_strdate(song_data.get('publish_date')),
  94. }