|
@@ -1,38 +1,26 @@
|
|
|
# coding: utf-8
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
-import re
|
|
|
+import hashlib
|
|
|
+import random
|
|
|
|
|
|
-from ..compat import compat_urlparse
|
|
|
+from ..compat import compat_str
|
|
|
from .common import InfoExtractor
|
|
|
-from ..utils import parse_duration
|
|
|
-
|
|
|
-
|
|
|
-class JamendoBaseIE(InfoExtractor):
|
|
|
- def _extract_meta(self, webpage, fatal=True):
|
|
|
- title = self._og_search_title(
|
|
|
- webpage, default=None) or self._search_regex(
|
|
|
- r'<title>([^<]+)', webpage,
|
|
|
- 'title', default=None)
|
|
|
- if title:
|
|
|
- title = self._search_regex(
|
|
|
- r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None)
|
|
|
- if not title:
|
|
|
- title = self._html_search_meta(
|
|
|
- 'name', webpage, 'title', fatal=fatal)
|
|
|
- mobj = re.search(r'(.+) - (.+)', title or '')
|
|
|
- artist, second = mobj.groups() if mobj else [None] * 2
|
|
|
- return title, artist, second
|
|
|
-
|
|
|
-
|
|
|
-class JamendoIE(JamendoBaseIE):
|
|
|
+from ..utils import (
|
|
|
+ clean_html,
|
|
|
+ int_or_none,
|
|
|
+ try_get,
|
|
|
+)
|
|
|
+
|
|
|
+
|
|
|
+class JamendoIE(InfoExtractor):
|
|
|
_VALID_URL = r'''(?x)
|
|
|
https?://
|
|
|
(?:
|
|
|
licensing\.jamendo\.com/[^/]+|
|
|
|
(?:www\.)?jamendo\.com
|
|
|
)
|
|
|
- /track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)
|
|
|
+ /track/(?P<id>[0-9]+)(?:/(?P<display_id>[^/?#&]+))?
|
|
|
'''
|
|
|
_TESTS = [{
|
|
|
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
|
|
@@ -45,7 +33,9 @@ class JamendoIE(JamendoBaseIE):
|
|
|
'artist': 'Maya Filipič',
|
|
|
'track': 'Stories from Emona I',
|
|
|
'duration': 210,
|
|
|
- 'thumbnail': r're:^https?://.*\.jpg'
|
|
|
+ 'thumbnail': r're:^https?://.*\.jpg',
|
|
|
+ 'timestamp': 1217438117,
|
|
|
+ 'upload_date': '20080730',
|
|
|
}
|
|
|
}, {
|
|
|
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
|
@@ -53,15 +43,19 @@ class JamendoIE(JamendoBaseIE):
|
|
|
}]
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
- mobj = self._VALID_URL_RE.match(url)
|
|
|
- track_id = mobj.group('id')
|
|
|
- display_id = mobj.group('display_id')
|
|
|
-
|
|
|
- webpage = self._download_webpage(
|
|
|
- 'https://www.jamendo.com/track/%s/%s' % (track_id, display_id),
|
|
|
- display_id)
|
|
|
-
|
|
|
- title, artist, track = self._extract_meta(webpage)
|
|
|
+ track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
|
|
+ webpage = self._download_webpage(url, track_id)
|
|
|
+ models = self._parse_json(self._html_search_regex(
|
|
|
+ r"data-bundled-models='([^']+)",
|
|
|
+ webpage, 'bundled models'), track_id)
|
|
|
+ track = models['track']['models'][0]
|
|
|
+ title = track_name = track['name']
|
|
|
+ get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
|
|
+ artist = get_model('artist')
|
|
|
+ artist_name = artist.get('name')
|
|
|
+ if artist_name:
|
|
|
+ title = '%s - %s' % (artist_name, title)
|
|
|
+ album = get_model('album')
|
|
|
|
|
|
formats = [{
|
|
|
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
|
@@ -77,31 +71,58 @@ class JamendoIE(JamendoBaseIE):
|
|
|
))]
|
|
|
self._sort_formats(formats)
|
|
|
|
|
|
- thumbnail = self._html_search_meta(
|
|
|
- 'image', webpage, 'thumbnail', fatal=False)
|
|
|
- duration = parse_duration(self._search_regex(
|
|
|
- r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']',
|
|
|
- webpage, 'duration', fatal=False))
|
|
|
+ urls = []
|
|
|
+ thumbnails = []
|
|
|
+ for _, covers in track.get('cover', {}).items():
|
|
|
+ for cover_id, cover_url in covers.items():
|
|
|
+ if not cover_url or cover_url in urls:
|
|
|
+ continue
|
|
|
+ urls.append(cover_url)
|
|
|
+ size = int_or_none(cover_id.lstrip('size'))
|
|
|
+ thumbnails.append({
|
|
|
+ 'id': cover_id,
|
|
|
+ 'url': cover_url,
|
|
|
+ 'width': size,
|
|
|
+ 'height': size,
|
|
|
+ })
|
|
|
+
|
|
|
+ tags = []
|
|
|
+ for tag in track.get('tags', []):
|
|
|
+ tag_name = tag.get('name')
|
|
|
+ if not tag_name:
|
|
|
+ continue
|
|
|
+ tags.append(tag_name)
|
|
|
+
|
|
|
+ stats = track.get('stats') or {}
|
|
|
|
|
|
return {
|
|
|
'id': track_id,
|
|
|
'display_id': display_id,
|
|
|
- 'thumbnail': thumbnail,
|
|
|
+ 'thumbnails': thumbnails,
|
|
|
'title': title,
|
|
|
- 'duration': duration,
|
|
|
- 'artist': artist,
|
|
|
- 'track': track,
|
|
|
- 'formats': formats
|
|
|
+ 'description': track.get('description'),
|
|
|
+ 'duration': int_or_none(track.get('duration')),
|
|
|
+ 'artist': artist_name,
|
|
|
+ 'track': track_name,
|
|
|
+ 'album': album.get('name'),
|
|
|
+ 'formats': formats,
|
|
|
+ 'license': '-'.join(track.get('licenseCC', [])) or None,
|
|
|
+ 'timestamp': int_or_none(track.get('dateCreated')),
|
|
|
+ 'view_count': int_or_none(stats.get('listenedAll')),
|
|
|
+ 'like_count': int_or_none(stats.get('favorited')),
|
|
|
+ 'average_rating': int_or_none(stats.get('averageNote')),
|
|
|
+ 'tags': tags,
|
|
|
}
|
|
|
|
|
|
|
|
|
-class JamendoAlbumIE(JamendoBaseIE):
|
|
|
- _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
|
|
|
+class JamendoAlbumIE(InfoExtractor):
|
|
|
+ _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
|
|
_TEST = {
|
|
|
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
|
|
'info_dict': {
|
|
|
'id': '121486',
|
|
|
- 'title': 'Shearer - Duck On Cover'
|
|
|
+ 'title': 'Duck On Cover',
|
|
|
+ 'description': 'md5:c2920eaeef07d7af5b96d7c64daf1239',
|
|
|
},
|
|
|
'playlist': [{
|
|
|
'md5': 'e1a2fcb42bda30dfac990212924149a8',
|
|
@@ -111,6 +132,8 @@ class JamendoAlbumIE(JamendoBaseIE):
|
|
|
'title': 'Shearer - Warmachine',
|
|
|
'artist': 'Shearer',
|
|
|
'track': 'Warmachine',
|
|
|
+ 'timestamp': 1368089771,
|
|
|
+ 'upload_date': '20130509',
|
|
|
}
|
|
|
}, {
|
|
|
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
|
|
@@ -120,6 +143,8 @@ class JamendoAlbumIE(JamendoBaseIE):
|
|
|
'title': 'Shearer - Without Your Ghost',
|
|
|
'artist': 'Shearer',
|
|
|
'track': 'Without Your Ghost',
|
|
|
+ 'timestamp': 1368089771,
|
|
|
+ 'upload_date': '20130509',
|
|
|
}
|
|
|
}],
|
|
|
'params': {
|
|
@@ -127,24 +152,35 @@ class JamendoAlbumIE(JamendoBaseIE):
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ def _call_api(self, resource, resource_id):
|
|
|
+ path = '/api/%ss' % resource
|
|
|
+ rand = compat_str(random.random())
|
|
|
+ return self._download_json(
|
|
|
+ 'https://www.jamendo.com' + path, resource_id, query={
|
|
|
+ 'id[]': resource_id,
|
|
|
+ }, headers={
|
|
|
+ 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
|
|
+ })[0]
|
|
|
+
|
|
|
def _real_extract(self, url):
|
|
|
- mobj = self._VALID_URL_RE.match(url)
|
|
|
- album_id = mobj.group('id')
|
|
|
-
|
|
|
- webpage = self._download_webpage(url, mobj.group('display_id'))
|
|
|
-
|
|
|
- title, artist, album = self._extract_meta(webpage, fatal=False)
|
|
|
-
|
|
|
- entries = [{
|
|
|
- '_type': 'url_transparent',
|
|
|
- 'url': compat_urlparse.urljoin(url, m.group('path')),
|
|
|
- 'ie_key': JamendoIE.ie_key(),
|
|
|
- 'id': self._search_regex(
|
|
|
- r'/track/(\d+)', m.group('path'), 'track id', default=None),
|
|
|
- 'artist': artist,
|
|
|
- 'album': album,
|
|
|
- } for m in re.finditer(
|
|
|
- r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
|
|
|
- webpage)]
|
|
|
-
|
|
|
- return self.playlist_result(entries, album_id, title)
|
|
|
+ album_id = self._match_id(url)
|
|
|
+ album = self._call_api('album', album_id)
|
|
|
+ album_name = album.get('name')
|
|
|
+
|
|
|
+ entries = []
|
|
|
+ for track in album.get('tracks', []):
|
|
|
+ track_id = track.get('id')
|
|
|
+ if not track_id:
|
|
|
+ continue
|
|
|
+ track_id = compat_str(track_id)
|
|
|
+ entries.append({
|
|
|
+ '_type': 'url_transparent',
|
|
|
+ 'url': 'https://www.jamendo.com/track/' + track_id,
|
|
|
+ 'ie_key': JamendoIE.ie_key(),
|
|
|
+ 'id': track_id,
|
|
|
+ 'album': album_name,
|
|
|
+ })
|
|
|
+
|
|
|
+ return self.playlist_result(
|
|
|
+ entries, album_id, album_name,
|
|
|
+ clean_html(try_get(album, lambda x: x['description']['en'], compat_str)))
|