преди 10 години · 543ec2136b
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -63,6 +63,7 @@ from .ccc import CCCIE
 
				 from .ceskatelevize import CeskaTelevizeIE
			
 
				 from .channel9 import Channel9IE
			
 
				 from .chilloutzone import ChilloutzoneIE
			
 
				+from .chirbit import ChirbitIE, ChirbitProfileIE
			
 
				 from .cinchcast import CinchcastIE
			
 
				 from .clipfish import ClipfishIE
			
 
				 from .cliphunter import CliphunterIE
			
@@ -425,7 +426,10 @@ from .soundcloud import (
 
				     SoundcloudUserIE,
			
 
				     SoundcloudPlaylistIE
			
 
				 )
			
 
				-from .soundgasm import SoundgasmIE
			
 
				+from .soundgasm import (
			
 
				+    SoundgasmIE,
			
 
				+    SoundgasmProfileIE
			
 
				+)
			
 
				 from .southpark import (
			
 
				     SouthParkIE,
			
 
				     SouthparkDeIE,
			
--- a/youtube_dl/extractor/chirbit.py
+++ b/youtube_dl/extractor/chirbit.py
@@ -0,0 +1,97 @@
 
				+# coding: utf-8
			
 
				+from __future__ import unicode_literals
			
 
				+
			
 
				+import re
			
 
				+
			
 
				+from .common import InfoExtractor
			
 
				+from ..utils import clean_html
			
 
				+
			
 
				+
			
 
				+class ChirbitIE(InfoExtractor):
			
 
				+    _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?P<id>[^/]+)'
			
 
				+    _TEST = {
			
 
				+        'url': 'http://chirb.it/PrIPv5',
			
 
				+        'md5': '9847b0dad6ac3e074568bf2cfb197de8',
			
 
				+        'info_dict': {
			
 
				+            'id': 'PrIPv5',
			
 
				+            'display_id': 'kukushtv_1423231243',
			
 
				+            'ext': 'mp3',
			
 
				+            'title': 'Фасадстрой',
			
 
				+            'url': 'http://audio.chirbit.com/kukushtv_1423231243.mp3'
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    def _real_extract(self, url):
			
 
				+        audio_linkid = self._match_id(url)
			
 
				+        webpage = self._download_webpage(url, audio_linkid)
			
 
				+
			
 
				+        audio_title = self._html_search_regex(r'<h2\s+itemprop="name">(.*?)</h2>', webpage, 'title')
			
 
				+        audio_id = self._html_search_regex(r'\("setFile",\s+"http://audio.chirbit.com/(.*?).mp3"\)', webpage, 'audio ID')
			
 
				+        audio_url = 'http://audio.chirbit.com/' + audio_id + '.mp3';
			
 
				+
			
 
				+        return {
			
 
				+            'id': audio_linkid,
			
 
				+            'display_id': audio_id,
			
 
				+            'title': audio_title,
			
 
				+            'url': audio_url
			
 
				+        }
			
 
				+
			
 
				+class ChirbitProfileIE(InfoExtractor):
			
 
				+    _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?P<id>[^/]+)/?$'
			
 
				+    _TEST = {
			
 
				+        'url': 'http://chirbit.com/ScarletBeauty',
			
 
				+        'playlist_count': 3,
			
 
				+        'info_dict': {
			
 
				+            '_type': 'playlist',
			
 
				+            'title': 'ScarletBeauty',
			
 
				+            'id': 'ScarletBeauty'
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    def _real_extract(self, url):
			
 
				+        profile_id = self._match_id(url)
			
 
				+
			
 
				+        # Chirbit has a pretty weird "Last Page" navigation behavior.
			
 
				+        # We grab the profile's oldest entry to determine when to
			
 
				+        # stop fetching entries.
			
 
				+        oldestpage = self._download_webpage(url + '/24599', profile_id)
			
 
				+        oldest_page_entries = re.findall(
			
 
				+            r'''soundFile:\s*"http://audio.chirbit.com/(.*?).mp3"''',
			
 
				+            oldestpage);
			
 
				+        oldestentry = clean_html(oldest_page_entries[-1]);
			
 
				+
			
 
				+        ids = []
			
 
				+        titles = []
			
 
				+        n = 0
			
 
				+        while True:
			
 
				+            page = self._download_webpage(url + '/' + str(n), profile_id)
			
 
				+            page_ids = re.findall(
			
 
				+                r'''soundFile:\s*"http://audio.chirbit.com/(.*?).mp3"''',
			
 
				+                page);
			
 
				+            page_titles = re.findall(
			
 
				+                r'''<div\s+class="chirbit_title"\s*>(.*?)</div>''',
			
 
				+                page);
			
 
				+            ids += page_ids
			
 
				+            titles += page_titles
			
 
				+            if oldestentry in page_ids:
			
 
				+                break
			
 
				+            n += 1
			
 
				+
			
 
				+        entries = []
			
 
				+        i = 0
			
 
				+        for id in ids:
			
 
				+            entries.append({
			
 
				+                'id': id,
			
 
				+                'title': titles[i],
			
 
				+                'url': 'http://audio.chirbit.com/' + id + '.mp3'
			
 
				+            });
			
 
				+            i += 1
			
 
				+
			
 
				+        info_dict = {
			
 
				+            '_type': 'playlist',
			
 
				+            'id': profile_id,
			
 
				+            'title': profile_id,
			
 
				+            'entries': entries
			
 
				+        }
			
 
				+
			
 
				+        return info_dict;
			
--- a/youtube_dl/extractor/soundgasm.py
+++ b/youtube_dl/extractor/soundgasm.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 
				 import re
			
 
				 
			
 
				 from .common import InfoExtractor
			
 
				+from ..utils import clean_html
			
 
				 
			
 
				 
			
 
				 class SoundgasmIE(InfoExtractor):
			
@@ -38,3 +39,38 @@ class SoundgasmIE(InfoExtractor):
 
				             'title': audio_title,
			
 
				             'description': description
			
 
				         }
			
 
				+
			
 
				+class SoundgasmProfileIE(InfoExtractor):
			
 
				+    _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[0-9a-zA-Z_\-]+)/?$'
			
 
				+    _TEST = {
			
 
				+        'url': 'http://soundgasm.net/u/ytdl',
			
 
				+        'playlist_count': 1,
			
 
				+        'info_dict': {
			
 
				+            '_type': 'playlist',
			
 
				+            'id': 'ytdl',
			
 
				+            'title': 'ytdl'
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    def _real_extract(self, url):
			
 
				+        profile_id = self._match_id(url)
			
 
				+        webpage = self._download_webpage(url, profile_id)
			
 
				+
			
 
				+        ids = re.findall(r'''<a\s+href=".+?/u/%s/([^/]+)">''' % re.escape(profile_id), webpage)
			
 
				+        ids = [clean_html(id) for id in ids]
			
 
				+
			
 
				+        entries = []
			
 
				+        for id in ids:
			
 
				+            entries.append({
			
 
				+                '_type': 'url',
			
 
				+                'url': ('http://soundgasm.net/u/%s/%s' % (profile_id, id))
			
 
				+            })
			
 
				+
			
 
				+        info_dict = {
			
 
				+            '_type': 'playlist',
			
 
				+            'id': profile_id,
			
 
				+            'title': profile_id,
			
 
				+            'entries': entries
			
 
				+        }
			
 
				+
			
 
				+        return info_dict;