浏览代码

[jadorecettepub] Add extractor (Fixes #2148)

Philipp Hagemeister 11 年之前
父节点
当前提交
9766538124

+ 1 - 1
test/test_all_urls.py

@@ -85,7 +85,7 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
 
 
     def test_youtube_extract(self):
     def test_youtube_extract(self):
-        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
+        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
         assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
         assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
         assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
         assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
         assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
         assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')

+ 5 - 5
test/test_youtube_lists.py

@@ -30,7 +30,7 @@ class TestYoutubeLists(unittest.TestCase):
         result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
         result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
         self.assertIsPlaylist(result)
         self.assertIsPlaylist(result)
         self.assertEqual(result['title'], 'ytdl test PL')
         self.assertEqual(result['title'], 'ytdl test PL')
-        ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
+        ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
         self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
         self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
 
 
     def test_youtube_playlist_noplaylist(self):
     def test_youtube_playlist_noplaylist(self):
@@ -39,7 +39,7 @@ class TestYoutubeLists(unittest.TestCase):
         ie = YoutubePlaylistIE(dl)
         ie = YoutubePlaylistIE(dl)
         result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
         result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
         self.assertEqual(result['_type'], 'url')
         self.assertEqual(result['_type'], 'url')
-        self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg')
+        self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
 
 
     def test_issue_673(self):
     def test_issue_673(self):
         dl = FakeYDL()
         dl = FakeYDL()
@@ -59,7 +59,7 @@ class TestYoutubeLists(unittest.TestCase):
         dl = FakeYDL()
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
         ie = YoutubePlaylistIE(dl)
         result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
         result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
-        ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
+        ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
         self.assertFalse('pElCt5oNDuI' in ytie_results)
         self.assertFalse('pElCt5oNDuI' in ytie_results)
         self.assertFalse('KdPEApIVdWM' in ytie_results)
         self.assertFalse('KdPEApIVdWM' in ytie_results)
         
         
@@ -76,9 +76,9 @@ class TestYoutubeLists(unittest.TestCase):
         # TODO find a > 100 (paginating?) videos course
         # TODO find a > 100 (paginating?) videos course
         result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
         result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
         entries = result['entries']
         entries = result['entries']
-        self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
+        self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
         self.assertEqual(len(entries), 25)
         self.assertEqual(len(entries), 25)
-        self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')
+        self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
 
 
     def test_youtube_channel(self):
     def test_youtube_channel(self):
         dl = FakeYDL()
         dl = FakeYDL()

+ 1 - 0
youtube_dl/extractor/__init__.py

@@ -105,6 +105,7 @@ from .ivi import (
     IviIE,
     IviIE,
     IviCompilationIE
     IviCompilationIE
 )
 )
+from .jadorecettepub import JadoreCettePubIE
 from .jeuxvideo import JeuxVideoIE
 from .jeuxvideo import JeuxVideoIE
 from .jukebox import JukeboxIE
 from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
 from .justintv import JustinTVIE

+ 49 - 0
youtube_dl/extractor/jadorecettepub.py

@@ -0,0 +1,49 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+
+
+class JadoreCettePubIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
+
+    _TEST = {
+        'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
+        'md5': '401286a06067c70b44076044b66515de',
+        'info_dict': {
+            'id': 'jLMja3tr7a4',
+            'ext': 'mp4',
+            'title': 'La pire utilisation de Star Wars',
+            'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon.  Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._html_search_regex(
+            r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
+            webpage, 'title')
+        description = self._html_search_regex(
+            r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
+            fatal=False)
+        real_url = self._search_regex(
+            r'\[/postlink\](.*)endofvid', webpage, 'video URL')
+        video_id = YoutubeIE.extract_id(real_url)
+
+        return {
+            '_type': 'url_transparent',
+            'url': real_url,
+            'id': video_id,
+            'title': title,
+            'description': description,
+        }
+

+ 4 - 3
youtube_dl/extractor/youtube.py

@@ -1085,8 +1085,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             self._downloader.report_warning(err_msg)
             self._downloader.report_warning(err_msg)
             return {}
             return {}
 
 
-    def _extract_id(self, url):
-        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
+    @classmethod
+    def extract_id(cls, url):
+        mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
         if mobj is None:
         if mobj is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
             raise ExtractorError(u'Invalid URL: %s' % url)
         video_id = mobj.group(2)
         video_id = mobj.group(2)
@@ -1115,7 +1116,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         mobj = re.search(self._NEXT_URL_RE, url)
         mobj = re.search(self._NEXT_URL_RE, url)
         if mobj:
         if mobj:
             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
-        video_id = self._extract_id(url)
+        video_id = self.extract_id(url)
 
 
         # Get video webpage
         # Get video webpage
         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id