Sfoglia il codice sorgente

[bandcamp] add support for albums (reported in #1270)

Jaime Marquínez Ferrándiz 11 anni fa
parent
commit
0980426559

+ 9 - 0
test/test_playlists.py

@@ -22,6 +22,7 @@ from youtube_dl.extractor import (
     LivestreamIE,
     LivestreamIE,
     NHLVideocenterIE,
     NHLVideocenterIE,
     BambuserChannelIE,
     BambuserChannelIE,
+    BandcampAlbumIE
 )
 )
 
 
 
 
@@ -103,5 +104,13 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], u'pixelversity')
         self.assertEqual(result['title'], u'pixelversity')
         self.assertTrue(len(result['entries']) >= 66)
         self.assertTrue(len(result['entries']) >= 66)
 
 
+    def test_bandcamp_album(self):
+        dl = FakeYDL()
+        ie = BandcampAlbumIE(dl)
+        result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'Nightmare Night EP')
+        self.assertTrue(len(result['entries']) >= 4)
+
 if __name__ == '__main__':
 if __name__ == '__main__':
     unittest.main()
     unittest.main()

+ 1 - 1
youtube_dl/extractor/__init__.py

@@ -11,7 +11,7 @@ from .arte import (
 )
 )
 from .auengine import AUEngineIE
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bambuser import BambuserIE, BambuserChannelIE
-from .bandcamp import BandcampIE
+from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .bloomberg import BloombergIE
 from .breakcom import BreakIE
 from .breakcom import BreakIE

+ 24 - 0
youtube_dl/extractor/bandcamp.py

@@ -3,11 +3,13 @@ import re
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
+    compat_urlparse,
     ExtractorError,
     ExtractorError,
 )
 )
 
 
 
 
 class BandcampIE(InfoExtractor):
 class BandcampIE(InfoExtractor):
+    IE_NAME = u'Bandcamp'
     _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
     _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
     _TEST = {
     _TEST = {
         u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
         u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
@@ -61,3 +63,25 @@ class BandcampIE(InfoExtractor):
                       }
                       }
 
 
         return [track_info]
         return [track_info]
+
+
+class BandcampAlbumIE(InfoExtractor):
+    IE_NAME = u'Bandcamp:album'
+    _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title = mobj.group('title')
+        webpage = self._download_webpage(url, title)
+        tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
+        if not tracks_paths:
+            raise ExtractorError(u'The page doesn\'t contain any track')
+        entries = [
+            self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
+            for t_path in tracks_paths]
+        title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title')
+        return {
+            '_type': 'playlist',
+            'title': title,
+            'entries': entries,
+        }

+ 2 - 1
youtube_dl/extractor/generic.py

@@ -199,7 +199,8 @@ class GenericIE(InfoExtractor):
         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
         if mobj is not None:
         if mobj is not None:
             burl = unescapeHTML(mobj.group(1))
             burl = unescapeHTML(mobj.group(1))
-            return self.url_result(burl, 'Bandcamp')
+            # Don't set the extractor because it can be a track url or an album
+            return self.url_result(burl)
 
 
         # Start with something easy: JW Player in SWFObject
         # Start with something easy: JW Player in SWFObject
         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)