Prechádzať zdrojové kódy

[youtube] Fix some issues with the detection of playlist/channel urls (reported in #1374)

They were being caught by YoutubeUserIE, now it only extracts a url if the rest of extractors aren't suitable.
Now the url tests check that the urls can only be extracted with an specific extractor.
Jaime Marquínez Ferrándiz 12 rokov pred
rodič
commit
e3ea479087
2 zmenil súbory, kde vykonal 22 pridanie a 13 odobranie
  1. 16 11
      test/test_all_urls.py
  2. 6 2
      youtube_dl/extractor/youtube.py

+ 16 - 11
test/test_all_urls.py

@@ -21,14 +21,15 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertEqual(self.matching_ies(url), ie_list)
         self.assertEqual(self.matching_ies(url), ie_list)
 
 
     def test_youtube_playlist_matching(self):
     def test_youtube_playlist_matching(self):
-        self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
-        self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
-        self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M'))
+        assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
+        assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
+        assertPlaylist(u'PL63F0C78739B09958')
+        assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
+        assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
+        assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
+        self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
 
 
     def test_youtube_matching(self):
     def test_youtube_matching(self):
         self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
         self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
@@ -37,9 +38,10 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
         self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
 
 
     def test_youtube_channel_matching(self):
     def test_youtube_channel_matching(self):
-        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
-        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
-        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
+        assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
 
 
     def test_youtube_user_matching(self):
     def test_youtube_user_matching(self):
         self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
         self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
@@ -50,6 +52,9 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
         self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
         self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
         self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
 
 
+    def test_youtube_show_matching(self):
+        self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
+
     def test_justin_tv_channelid_matching(self):
     def test_justin_tv_channelid_matching(self):
         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))

+ 6 - 2
youtube_dl/extractor/youtube.py

@@ -386,7 +386,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     @classmethod
     @classmethod
     def suitable(cls, url):
     def suitable(cls, url):
         """Receives a URL and returns True if suitable for this IE."""
         """Receives a URL and returns True if suitable for this IE."""
-        if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
+        if YoutubePlaylistIE.suitable(url): return False
         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 
 
     def report_video_webpage_download(self, video_id):
     def report_video_webpage_download(self, video_id):
@@ -1021,8 +1021,12 @@ class YoutubeUserIE(InfoExtractor):
     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
     IE_NAME = u'youtube:user'
     IE_NAME = u'youtube:user'
 
 
+    @classmethod
     def suitable(cls, url):
     def suitable(cls, url):
-        if YoutubeIE.suitable(url) or YoutubeFavouritesIE.suitable(url): return False
+        # Don't return True if the url can be extracted with other youtube
+        # extractor, the regex would is too permissive and it would match.
+        other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
+        if any(ie.suitable(url) for ie in other_ies): return False
         else: return super(YoutubeUserIE, cls).suitable(url)
         else: return super(YoutubeUserIE, cls).suitable(url)
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):