فهرست منبع

[livestream:original] Add support for folder urls (closes #2631)

The webpage only contains shortened links for the videos, since the server
doesn't support HEAD requests, we use an specific extractor for them.
Jaime Marquínez Ferrándiz 11 سال پیش
والد
کامیت
78338f71ca
4فایلهای تغییر یافته به همراه64 افزوده شده و 6 حذف شده
  1. 9 0
      test/test_playlists.py
  2. 5 1
      youtube_dl/extractor/__init__.py
  3. 3 0
      youtube_dl/extractor/common.py
  4. 47 5
      youtube_dl/extractor/livestream.py

+ 9 - 0
test/test_playlists.py

@@ -30,6 +30,7 @@ from youtube_dl.extractor import (
     SoundcloudPlaylistIE,
     SoundcloudPlaylistIE,
     TeacherTubeClassroomIE,
     TeacherTubeClassroomIE,
     LivestreamIE,
     LivestreamIE,
+    LivestreamOriginalIE,
     NHLVideocenterIE,
     NHLVideocenterIE,
     BambuserChannelIE,
     BambuserChannelIE,
     BandcampAlbumIE,
     BandcampAlbumIE,
@@ -155,6 +156,14 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], 'TEDCity2.0 (English)')
         self.assertEqual(result['title'], 'TEDCity2.0 (English)')
         self.assertTrue(len(result['entries']) >= 4)
         self.assertTrue(len(result['entries']) >= 4)
 
 
+    def test_livestreamoriginal_folder(self):
+        dl = FakeYDL()
+        ie = LivestreamOriginalIE(dl)
+        result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
+        self.assertTrue(len(result['entries']) >= 28)
+
     def test_nhl_videocenter(self):
     def test_nhl_videocenter(self):
         dl = FakeYDL()
         dl = FakeYDL()
         ie = NHLVideocenterIE(dl)
         ie = NHLVideocenterIE(dl)

+ 5 - 1
youtube_dl/extractor/__init__.py

@@ -147,7 +147,11 @@ from .ku6 import Ku6IE
 from .la7 import LA7IE
 from .la7 import LA7IE
 from .lifenews import LifeNewsIE
 from .lifenews import LifeNewsIE
 from .liveleak import LiveLeakIE
 from .liveleak import LiveLeakIE
-from .livestream import LivestreamIE, LivestreamOriginalIE
+from .livestream import (
+    LivestreamIE,
+    LivestreamOriginalIE,
+    LivestreamShortenerIE,
+)
 from .lynda import (
 from .lynda import (
     LyndaIE,
     LyndaIE,
     LyndaCourseIE
     LyndaCourseIE

+ 3 - 0
youtube_dl/extractor/common.py

@@ -459,6 +459,9 @@ class InfoExtractor(object):
         if secure: regexes = self._og_regexes('video:secure_url') + regexes
         if secure: regexes = self._og_regexes('video:secure_url') + regexes
         return self._html_search_regex(regexes, html, name, **kargs)
         return self._html_search_regex(regexes, html, name, **kargs)
 
 
+    def _og_search_url(self, html, **kargs):
+        return self._og_search_property('url', html, **kargs)
+
     def _html_search_meta(self, name, html, display_name=None, fatal=False):
     def _html_search_meta(self, name, html, display_name=None, fatal=False):
         if display_name is None:
         if display_name is None:
             display_name = name
             display_name = name

+ 47 - 5
youtube_dl/extractor/livestream.py

@@ -9,6 +9,7 @@ from ..utils import (
     compat_urlparse,
     compat_urlparse,
     xpath_with_ns,
     xpath_with_ns,
     compat_str,
     compat_str,
+    orderedSet,
 )
 )
 
 
 
 
@@ -64,7 +65,10 @@ class LivestreamIE(InfoExtractor):
 # The original version of Livestream uses a different system
 # The original version of Livestream uses a different system
 class LivestreamOriginalIE(InfoExtractor):
 class LivestreamOriginalIE(InfoExtractor):
     IE_NAME = 'livestream:original'
     IE_NAME = 'livestream:original'
-    _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
+    _VALID_URL = r'''(?x)https?://www\.livestream\.com/
+        (?P<user>[^/]+)/(?P<type>video|folder)
+        (?:\?.*?Id=|/)(?P<id>.*?)(&|$)
+        '''
     _TEST = {
     _TEST = {
         'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
         'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
         'info_dict': {
         'info_dict': {
@@ -78,10 +82,7 @@ class LivestreamOriginalIE(InfoExtractor):
         },
         },
     }
     }
 
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        user = mobj.group('user')
+    def _extract_video(self, user, video_id):
         api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
         api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
 
 
         info = self._download_xml(api_url, video_id)
         info = self._download_xml(api_url, video_id)
@@ -99,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor):
             'ext': 'flv',
             'ext': 'flv',
             'thumbnail': thumbnail_url,
             'thumbnail': thumbnail_url,
         }
         }
+
+    def _extract_folder(self, url, folder_id):
+        webpage = self._download_webpage(url, folder_id)
+        urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
+
+        return {
+            '_type': 'playlist',
+            'id': folder_id,
+            'entries': [{
+                '_type': 'url',
+                'url': video_url,
+            } for video_url in urls],
+        }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        id = mobj.group('id')
+        user = mobj.group('user')
+        url_type = mobj.group('type')
+        if url_type == 'folder':
+            return self._extract_folder(url, id)
+        else:
+            return self._extract_video(user, id)
+
+
+# The server doesn't support HEAD request, the generic extractor can't detect
+# the redirection
+class LivestreamShortenerIE(InfoExtractor):
+    IE_NAME = 'livestream:shortener'
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        id = mobj.group('id')
+        webpage = self._download_webpage(url, id)
+
+        return {
+            '_type': 'url',
+            'url': self._og_search_url(webpage),
+        }