Browse Source

[livestream:original] Add support for folder urls (closes #2631)

The webpage only contains shortened links for the videos, since the server
doesn't support HEAD requests, we use an specific extractor for them.
Jaime Marquínez Ferrándiz 11 years ago
parent
commit
78338f71ca

+ 9 - 0
test/test_playlists.py

@@ -30,6 +30,7 @@ from youtube_dl.extractor import (
     SoundcloudPlaylistIE,
     TeacherTubeClassroomIE,
     LivestreamIE,
+    LivestreamOriginalIE,
     NHLVideocenterIE,
     BambuserChannelIE,
     BandcampAlbumIE,
@@ -155,6 +156,14 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], 'TEDCity2.0 (English)')
         self.assertTrue(len(result['entries']) >= 4)
 
+    def test_livestreamoriginal_folder(self):
+        dl = FakeYDL()
+        ie = LivestreamOriginalIE(dl)
+        result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
+        self.assertTrue(len(result['entries']) >= 28)
+
     def test_nhl_videocenter(self):
         dl = FakeYDL()
         ie = NHLVideocenterIE(dl)

+ 5 - 1
youtube_dl/extractor/__init__.py

@@ -147,7 +147,11 @@ from .ku6 import Ku6IE
 from .la7 import LA7IE
 from .lifenews import LifeNewsIE
 from .liveleak import LiveLeakIE
-from .livestream import LivestreamIE, LivestreamOriginalIE
+from .livestream import (
+    LivestreamIE,
+    LivestreamOriginalIE,
+    LivestreamShortenerIE,
+)
 from .lynda import (
     LyndaIE,
     LyndaCourseIE

+ 3 - 0
youtube_dl/extractor/common.py

@@ -459,6 +459,9 @@ class InfoExtractor(object):
         if secure: regexes = self._og_regexes('video:secure_url') + regexes
         return self._html_search_regex(regexes, html, name, **kargs)
 
+    def _og_search_url(self, html, **kargs):
+        return self._og_search_property('url', html, **kargs)
+
     def _html_search_meta(self, name, html, display_name=None, fatal=False):
         if display_name is None:
             display_name = name

+ 47 - 5
youtube_dl/extractor/livestream.py

@@ -9,6 +9,7 @@ from ..utils import (
     compat_urlparse,
     xpath_with_ns,
     compat_str,
+    orderedSet,
 )
 
 
@@ -64,7 +65,10 @@ class LivestreamIE(InfoExtractor):
 # The original version of Livestream uses a different system
 class LivestreamOriginalIE(InfoExtractor):
     IE_NAME = 'livestream:original'
-    _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
+    _VALID_URL = r'''(?x)https?://www\.livestream\.com/
+        (?P<user>[^/]+)/(?P<type>video|folder)
+        (?:\?.*?Id=|/)(?P<id>.*?)(&|$)
+        '''
     _TEST = {
         'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
         'info_dict': {
@@ -78,10 +82,7 @@ class LivestreamOriginalIE(InfoExtractor):
         },
     }
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        user = mobj.group('user')
+    def _extract_video(self, user, video_id):
         api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
 
         info = self._download_xml(api_url, video_id)
@@ -99,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor):
             'ext': 'flv',
             'thumbnail': thumbnail_url,
         }
+
+    def _extract_folder(self, url, folder_id):
+        webpage = self._download_webpage(url, folder_id)
+        urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
+
+        return {
+            '_type': 'playlist',
+            'id': folder_id,
+            'entries': [{
+                '_type': 'url',
+                'url': video_url,
+            } for video_url in urls],
+        }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        id = mobj.group('id')
+        user = mobj.group('user')
+        url_type = mobj.group('type')
+        if url_type == 'folder':
+            return self._extract_folder(url, id)
+        else:
+            return self._extract_video(user, id)
+
+
+# The server doesn't support HEAD request, the generic extractor can't detect
+# the redirection
+class LivestreamShortenerIE(InfoExtractor):
+    IE_NAME = 'livestream:shortener'
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        id = mobj.group('id')
+        webpage = self._download_webpage(url, id)
+
+        return {
+            '_type': 'url',
+            'url': self._og_search_url(webpage),
+        }