Browse Source

[ministrygrid] Add extractor (Fixes #2900)

Philipp Hagemeister 11 years ago
parent
commit
4d54ef20a2

+ 1 - 0
youtube_dl/extractor/__init__.py

@@ -184,6 +184,7 @@ from .malemotion import MalemotionIE
 from .mdr import MDRIE
 from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .metacritic import MetacriticIE
+from .ministrygrid import MinistryGridIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mitele import MiTeleIE
 from .mitele import MiTeleIE
 from .mixcloud import MixcloudIE
 from .mixcloud import MixcloudIE

+ 12 - 1
youtube_dl/extractor/generic.py

@@ -22,6 +22,7 @@ from ..utils import (
     smuggle_url,
     smuggle_url,
     unescapeHTML,
     unescapeHTML,
     unified_strdate,
     unified_strdate,
+    unsmuggle_url,
     url_basename,
     url_basename,
 )
 )
 from .brightcove import BrightcoveIE
 from .brightcove import BrightcoveIE
@@ -487,7 +488,14 @@ class GenericIE(InfoExtractor):
             else:
             else:
                 assert ':' in default_search
                 assert ':' in default_search
                 return self.url_result(default_search + url)
                 return self.url_result(default_search + url)
-        video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
+
+        url, smuggled_data = unsmuggle_url(url)
+        force_videoid = None
+        if smuggled_data and 'force_videoid' in smuggled_data:
+            force_videoid = smuggled_data['force_videoid']
+            video_id = force_videoid
+        else:
+            video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
 
 
         self.to_screen('%s: Requesting header' % video_id)
         self.to_screen('%s: Requesting header' % video_id)
 
 
@@ -498,6 +506,9 @@ class GenericIE(InfoExtractor):
             new_url = response.geturl()
             new_url = response.geturl()
             if url != new_url:
             if url != new_url:
                 self.report_following_redirect(new_url)
                 self.report_following_redirect(new_url)
+                if force_videoid:
+                    new_url = smuggle_url(
+                        new_url, {'force_videoid': force_videoid})
                 return self.url_result(new_url)
                 return self.url_result(new_url)
 
 
             # Check for direct link to a video
             # Check for direct link to a video

+ 57 - 0
youtube_dl/extractor/ministrygrid.py

@@ -0,0 +1,57 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    smuggle_url,
+)
+
+
+class MinistryGridIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
+
+    _TEST = {
+        'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers',
+        'md5': '844be0d2a1340422759c2a9101bab017',
+        'info_dict': {
+            'id': '3453494717001',
+            'ext': 'mp4',
+            'title': 'The Gospel by Numbers',
+            'description': 'Coming soon from T4G 2014!',
+            'uploader': 'LifeWay Christian Resources (MG)',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        portlets_json = self._search_regex(
+            r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list')
+        portlets = json.loads(portlets_json)
+        pl_id = self._search_regex(
+            r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id')
+
+        for i, portlet in enumerate(portlets):
+            portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
+            portlet_code = self._download_webpage(
+                portlet_url, video_id,
+                note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)),
+                fatal=False)
+            video_iframe_url = self._search_regex(
+                r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
+                default=None)
+            if video_iframe_url:
+                surl = smuggle_url(
+                    video_iframe_url, {'force_videoid': video_id})
+                return {
+                    '_type': 'url',
+                    'id': video_id,
+                    'url': surl,
+                }
+
+        raise ExtractorError('Could not find video iframe in any portlets')