浏览代码

[chirbit:profile] Fix extraction

Yen Chi Hsuan 9 年之前
父节点
当前提交
2ecbd2ad6f
共有 2 个文件被更改,包括 6 次插入8 次删除
  1. 1 0
      ChangeLog
  2. 5 8
      youtube_dl/extractor/chirbit.py

+ 1 - 0
ChangeLog

@@ -1,6 +1,7 @@
 version <unreleased>
 version <unreleased>
 
 
 Extractors
 Extractors
+* [chirbit] Fix extraction of user profile pages
 * [charambatv] Fix extraction
 * [charambatv] Fix extraction
 * [canalplus] Fix extraction for some videos
 * [canalplus] Fix extraction for some videos
 * [cbsinteractive] Fix extraction for cnet.com
 * [cbsinteractive] Fix extraction for cnet.com

+ 5 - 8
youtube_dl/extractor/chirbit.py

@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
 import base64
 import base64
+import re
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import parse_duration
 from ..utils import parse_duration
@@ -70,7 +71,6 @@ class ChirbitProfileIE(InfoExtractor):
         'url': 'http://chirbit.com/ScarletBeauty',
         'url': 'http://chirbit.com/ScarletBeauty',
         'info_dict': {
         'info_dict': {
             'id': 'ScarletBeauty',
             'id': 'ScarletBeauty',
-            'title': 'Chirbits by ScarletBeauty',
         },
         },
         'playlist_mincount': 3,
         'playlist_mincount': 3,
     }
     }
@@ -78,13 +78,10 @@ class ChirbitProfileIE(InfoExtractor):
     def _real_extract(self, url):
     def _real_extract(self, url):
         profile_id = self._match_id(url)
         profile_id = self._match_id(url)
 
 
-        rss = self._download_xml(
-            'http://chirbit.com/rss/%s' % profile_id, profile_id)
+        webpage = self._download_webpage(url, profile_id)
 
 
         entries = [
         entries = [
-            self.url_result(audio_url.text, 'Chirbit')
-            for audio_url in rss.findall('./channel/item/link')]
+            self.url_result(self._proto_relative_url('//chirb.it/' + video_id))
+            for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)]
 
 
-        title = rss.find('./channel/title').text
-
-        return self.playlist_result(entries, profile_id, title)
+        return self.playlist_result(entries, profile_id)