Browse Source

Merge remote-tracking branch 'xavierbeynon/master'

Philipp Hagemeister 10 years ago
parent
commit
51897bb77c
2 changed files with 81 additions and 25 deletions
  1. 1 1
      youtube_dl/extractor/__init__.py
  2. 80 24
      youtube_dl/extractor/audiomack.py

+ 1 - 1
youtube_dl/extractor/__init__.py

@@ -26,7 +26,7 @@ from .arte import (
     ArteTVEmbedIE,
     ArteTVEmbedIE,
 )
 )
 from .atresplayer import AtresPlayerIE
 from .atresplayer import AtresPlayerIE
-from .audiomack import AudiomackIE
+from .audiomack import AudiomackIE, AudiomackAlbumIE
 from .auengine import AUEngineIE
 from .auengine import AUEngineIE
 from .azubu import AzubuIE
 from .azubu import AzubuIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bambuser import BambuserIE, BambuserChannelIE

+ 80 - 24
youtube_dl/extractor/audiomack.py

@@ -17,12 +17,13 @@ class AudiomackIE(InfoExtractor):
             'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
             'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
             'info_dict':
             'info_dict':
             {
             {
-                'id': 'roosh-williams/extraordinary',
+                'id': '310086',
                 'ext': 'mp3',
                 'ext': 'mp3',
-                'title': 'Roosh Williams - Extraordinary'
+                'artist': 'Roosh Williams',
+                'title': 'Extraordinary'
             }
             }
         },
         },
-        # hosted on soundcloud via audiomack
+        # audiomack wrapper around soundcloud song
         {
         {
             'add_ie': ['Soundcloud'],
             'add_ie': ['Soundcloud'],
             'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
             'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
@@ -37,33 +38,88 @@ class AudiomackIE(InfoExtractor):
         },
         },
     ]
     ]
 
 
+    @staticmethod
+    def create_song_dictionary(api_response, album_url_tag, track_no=0):
+        # All keys are the same in audiomack api and InfoExtractor format
+        entry = {key: api_response[key] for key in ['title', 'artist', 'id', 'url'] if key in api_response}
+        # Fudge values in the face of missing metadata
+        if 'id' not in entry:
+            entry['id'] = track_no
+        if 'title' not in entry:
+            entry['title'] = album_url_tag
+        return entry
+
     def _real_extract(self, url):
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        # URLs end with [uploader name]/[uploader title]
+        # this title is whatever the user types in, and is rarely
+        # the proper song title.  Real metadata is in the api response
+        album_url_tag = self._match_id(url)
 
 
+        # Request the extended version of the api for extra fields like artist and title
         api_response = self._download_json(
         api_response = self._download_json(
-            "http://www.audiomack.com/api/music/url/song/%s?_=%d" % (
-                video_id, time.time()),
-            video_id)
+            'http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d' % (
+                album_url_tag, time.time()),
+            album_url_tag)
 
 
-        if "url" not in api_response:
-            raise ExtractorError("Unable to deduce api url of song")
-        realurl = api_response["url"]
+        # API is inconsistent with errors
+        if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
+            raise ExtractorError('Invalid url %s', url)
 
 
         # Audiomack wraps a lot of soundcloud tracks in their branded wrapper
         # Audiomack wraps a lot of soundcloud tracks in their branded wrapper
-        # - if so, pass the work off to the soundcloud extractor
-        if SoundcloudIE.suitable(realurl):
-            return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
+        # if so, pass the work off to the soundcloud extractor
+        if SoundcloudIE.suitable(api_response['url']):
+            return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
+
+        return self.create_song_dictionary(api_response, album_url_tag)
 
 
-        webpage = self._download_webpage(url, video_id)
-        artist = self._html_search_regex(
-            r'<span class="artist">(.*?)</span>', webpage, "artist")
-        songtitle = self._html_search_regex(
-            r'<h1 class="profile-title song-title"><span class="artist">.*?</span>(.*?)</h1>',
-            webpage, "title")
-        title = artist + " - " + songtitle
 
 
-        return {
-            'id': video_id,
-            'title': title,
-            'url': realurl,
+class AudiomackAlbumIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
+    IE_NAME = 'audiomack:album'
+    _TESTS = [
+        # Standard album playlist
+        {
+            'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
+            'playlist_count': 15,
+            'info_dict':
+            {
+                'id': '812251',
+                'title': 'Tha Tour: Part 2 (Official Mixtape)'
+            }
+        },
+        # Album playlist ripped from fakeshoredrive with no metadata
+        {
+            'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
+            'playlist_count': 10
         }
         }
+    ]
+
+    def _real_extract(self, url):
+        # URLs end with [uploader name]/[uploader title]
+        # this title is whatever the user types in, and is rarely
+        # the proper song title.  Real metadata is in the api response
+        album_url_tag = self._match_id(url)
+        result = {'_type': 'playlist', 'entries': []}
+        # There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
+        # Therefore we don't know how many songs the album has and must infi-loop until failure
+        track_no = 0
+        while True:
+            # Get song's metadata
+            api_response = self._download_json('http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
+                                               % (album_url_tag, track_no, time.time()), album_url_tag)
+
+            # Total failure, only occurs when url is totally wrong
+            # Won't happen in middle of valid playlist (next case)
+            if 'url' not in api_response or 'error' in api_response:
+                raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
+            # URL is good but song id doesn't exist - usually means end of playlist
+            elif not api_response['url']:
+                break
+            else:
+                # Pull out the album metadata and add to result (if it exists)
+                for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
+                    if apikey in api_response and resultkey not in result:
+                        result[resultkey] = api_response[apikey]
+                result['entries'].append(AudiomackIE.create_song_dictionary(api_response, album_url_tag, track_no))
+            track_no += 1
+        return result