Browse Source

[bambuser] Add an extractor for channels (closes #1702)

Jaime Marquínez Ferrándiz 12 years ago
parent
commit
165e3bb67a
3 changed files with 49 additions and 2 deletions
  1. 9 0
      test/test_playlists.py
  2. 1 1
      youtube_dl/extractor/__init__.py
  3. 39 1
      youtube_dl/extractor/bambuser.py

+ 9 - 0
test/test_playlists.py

@@ -20,6 +20,7 @@ from youtube_dl.extractor import (
     SoundcloudUserIE,
     SoundcloudUserIE,
     LivestreamIE,
     LivestreamIE,
     NHLVideocenterIE,
     NHLVideocenterIE,
+    BambuserChannelIE,
 )
 )
 
 
 
 
@@ -85,5 +86,13 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], u'Highlights')
         self.assertEqual(result['title'], u'Highlights')
         self.assertEqual(len(result['entries']), 12)
         self.assertEqual(len(result['entries']), 12)
 
 
+    def test_bambuser_channel(self):
+        dl = FakeYDL()
+        ie = BambuserChannelIE(dl)
+        result = ie.extract('http://bambuser.com/channel/pixelversity')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'pixelversity')
+        self.assertTrue(len(result['entries']) >= 66)
+
 if __name__ == '__main__':
 if __name__ == '__main__':
     unittest.main()
     unittest.main()

+ 1 - 1
youtube_dl/extractor/__init__.py

@@ -9,7 +9,7 @@ from .arte import (
     ArteTVFutureIE,
     ArteTVFutureIE,
 )
 )
 from .auengine import AUEngineIE
 from .auengine import AUEngineIE
-from .bambuser import BambuserIE
+from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE
 from .bandcamp import BandcampIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .bloomberg import BloombergIE

+ 39 - 1
youtube_dl/extractor/bambuser.py

@@ -1,10 +1,15 @@
 import re
 import re
 import json
 import json
+import itertools
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_request,
+)
 
 
 
 
 class BambuserIE(InfoExtractor):
 class BambuserIE(InfoExtractor):
+    IE_NAME = u'bambuser'
     _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
     _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
     _API_KEY = '005f64509e19a868399060af746a00aa'
     _API_KEY = '005f64509e19a868399060af746a00aa'
 
 
@@ -33,10 +38,43 @@ class BambuserIE(InfoExtractor):
             'id': video_id,
             'id': video_id,
             'title': info['title'],
             'title': info['title'],
             'url': info['url'],
             'url': info['url'],
-            'thumbnail': info['preview'],
+            'thumbnail': info.get('preview'),
             'duration': int(info['length']),
             'duration': int(info['length']),
             'view_count': int(info['views_total']),
             'view_count': int(info['views_total']),
             'uploader': info['username'],
             'uploader': info['username'],
             'uploader_id': info['uid'],
             'uploader_id': info['uid'],
         }
         }
 
 
+
+class BambuserChannelIE(InfoExtractor):
+    IE_NAME = u'bambuser:channel'
+    _VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
+    # The maximum number we can get with each request
+    _STEP = 50
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user = mobj.group('user')
+        urls = []
+        last_id = ''
+        for i in itertools.count(1):
+            req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
+                '&sort=created&access_mode=0%2C1%2C2&limit={count}'
+                '&method=broadcast&format=json&vid_older_than={last}'
+                ).format(user=user, count=self._STEP, last=last_id)
+            req = compat_urllib_request.Request(req_url)
+            # Without setting this header, we wouldn't get any result
+            req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
+            info_json = self._download_webpage(req, user,
+                u'Downloading page %d' % i)
+            results = json.loads(info_json)['result']
+            if len(results) == 0:
+                break
+            last_id = results[-1]['vid']
+            urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
+
+        return {
+            '_type': 'playlist',
+            'title': user,
+            'entries': urls,
+        }