浏览代码

[youtube] Move more tests to extractors

Philipp Hagemeister 11 年之前
父节点
当前提交
cdc628a498
共有 2 个文件被更改,包括 62 次插入48 次删除
  1. 0 39
      test/test_youtube_lists.py
  2. 62 9
      youtube_dl/extractor/youtube.py

+ 0 - 39
test/test_youtube_lists.py

@@ -10,7 +10,6 @@ from test.helper import FakeYDL
 
 
 
 
 from youtube_dl.extractor import (
 from youtube_dl.extractor import (
-    YoutubeUserIE,
     YoutubePlaylistIE,
     YoutubePlaylistIE,
     YoutubeIE,
     YoutubeIE,
     YoutubeChannelIE,
     YoutubeChannelIE,
@@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase):
         self.assertEqual(len(entries), 25)
         self.assertEqual(len(entries), 25)
         self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
         self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
 
 
-    def test_youtube_channel(self):
-        dl = FakeYDL()
-        ie = YoutubeChannelIE(dl)
-        #test paginated channel
-        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
-        self.assertTrue(len(result['entries']) > 90)
-        #test autogenerated channel
-        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
-        self.assertTrue(len(result['entries']) >= 18)
-
-    def test_youtube_user(self):
-        dl = FakeYDL()
-        ie = YoutubeUserIE(dl)
-        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
-        self.assertTrue(len(result['entries']) >= 320)
-
-    def test_youtube_show(self):
-        dl = FakeYDL()
-        ie = YoutubeShowIE(dl)
-        result = ie.extract('http://www.youtube.com/show/airdisasters')
-        self.assertTrue(len(result) >= 3)
-
     def test_youtube_mix(self):
     def test_youtube_mix(self):
         dl = FakeYDL()
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
         ie = YoutubePlaylistIE(dl)
@@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase):
         entries = result['entries']
         entries = result['entries']
         self.assertEqual(len(entries), 100)
         self.assertEqual(len(entries), 100)
 
 
-    def test_youtube_toplist(self):
-        dl = FakeYDL()
-        ie = YoutubeTopListIE(dl)
-        result = ie.extract('yttoplist:music:Trending')
-        entries = result['entries']
-        self.assertTrue(len(entries) >= 5)
-
-    def test_youtube_search_url(self):
-        dl = FakeYDL()
-        ie = YoutubeSearchURLIE(dl)
-        result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
-        entries = result['entries']
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'youtube-dl test video')
-        self.assertTrue(len(entries) >= 5)
-
 if __name__ == '__main__':
 if __name__ == '__main__':
     unittest.main()
     unittest.main()

+ 62 - 9
youtube_dl/extractor/youtube.py

@@ -1160,16 +1160,25 @@ class YoutubeTopListIE(YoutubePlaylistIE):
     IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
     IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
         ' (Example: "yttoplist:music:Top Tracks")')
         ' (Example: "yttoplist:music:Top Tracks")')
     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
-    _TESTS = []
+    _TESTS = [{
+        'url': 'yttoplist:music:Trending',
+        'playlist_mincount': 5,
+        'skip': 'Only works for logged-in users',
+    }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
         channel = mobj.group('chann')
         channel = mobj.group('chann')
         title = mobj.group('title')
         title = mobj.group('title')
         query = compat_urllib_parse.urlencode({'title': title})
         query = compat_urllib_parse.urlencode({'title': title})
-        playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
-        channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
-        link = self._html_search_regex(playlist_re, channel_page, 'list')
+        channel_page = self._download_webpage(
+            'https://www.youtube.com/%s' % channel, title)
+        link = self._html_search_regex(
+            r'''(?x)
+                <a\s+href="([^"]+)".*?>\s*
+                <span\s+class="branded-page-module-title-text">\s*
+                <span[^>]*>.*?%s.*?</span>''' % re.escape(query),
+            channel_page, 'list')
         url = compat_urlparse.urljoin('https://www.youtube.com/', link)
         url = compat_urlparse.urljoin('https://www.youtube.com/', link)
         
         
         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
@@ -1195,6 +1204,11 @@ class YoutubeChannelIE(InfoExtractor):
     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
     _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
     _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
     IE_NAME = 'youtube:channel'
     IE_NAME = 'youtube:channel'
+    _TESTS = [{
+        'note': 'paginated channel',
+        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+        'playlist_mincount': 91,
+    }]
 
 
     def extract_videos_from_page(self, page):
     def extract_videos_from_page(self, page):
         ids_in_page = []
         ids_in_page = []
@@ -1253,6 +1267,17 @@ class YoutubeUserIE(InfoExtractor):
     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
     IE_NAME = 'youtube:user'
     IE_NAME = 'youtube:user'
 
 
+    _TESTS = [{
+        'url': 'https://www.youtube.com/user/TheLinuxFoundation',
+        'playlist_mincount': 320,
+        'info_dict': {
+            'title': 'TheLinuxFoundation',
+        }
+    }, {
+        'url': 'ytuser:phihag',
+        'only_matching': True,
+    }]
+
     @classmethod
     @classmethod
     def suitable(cls, url):
     def suitable(cls, url):
         # Don't return True if the url can be extracted with other youtube
         # Don't return True if the url can be extracted with other youtube
@@ -1361,6 +1386,13 @@ class YoutubeSearchURLIE(InfoExtractor):
     IE_DESC = 'YouTube.com search URLs'
     IE_DESC = 'YouTube.com search URLs'
     IE_NAME = 'youtube:search_url'
     IE_NAME = 'youtube:search_url'
     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
+    _TESTS = [{
+        'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
+        'playlist_mincount': 5,
+        'info_dict': {
+            'title': 'youtube-dl test video',
+        }
+    }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
@@ -1395,17 +1427,38 @@ class YoutubeSearchURLIE(InfoExtractor):
 
 
 class YoutubeShowIE(InfoExtractor):
 class YoutubeShowIE(InfoExtractor):
     IE_DESC = 'YouTube.com (multi-season) shows'
     IE_DESC = 'YouTube.com (multi-season) shows'
-    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
+    _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
     IE_NAME = 'youtube:show'
     IE_NAME = 'youtube:show'
+    _TESTS = [{
+        'url': 'http://www.youtube.com/show/airdisasters',
+        'playlist_mincount': 3,
+        'info_dict': {
+            'id': 'airdisasters',
+            'title': 'Air Disasters',
+        }
+    }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
-        show_name = mobj.group(1)
-        webpage = self._download_webpage(url, show_name, 'Downloading show webpage')
+        playlist_id = mobj.group('id')
+        webpage = self._download_webpage(
+            url, playlist_id, 'Downloading show webpage')
         # There's one playlist for each season of the show
         # There's one playlist for each season of the show
         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
-        self.to_screen('%s: Found %s seasons' % (show_name, len(m_seasons)))
-        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
+        self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
+        entries = [
+            self.url_result(
+                'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
+            for season in m_seasons
+        ]
+        title = self._og_search_title(webpage, fatal=False)
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': title,
+            'entries': entries,
+        }
 
 
 
 
 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):