7 年之前 · 4b3ee09886
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -770,6 +770,7 @@ from .nrk import (
 
															     NRKTVDirekteIE,
														
 
															     NRKTVEpisodeIE,
														
 
															     NRKTVEpisodesIE,
														
 
															+    NRKTVSeasonIE,
														
 
															     NRKTVSeriesIE,
														
 
															 )
														
 
															 from .ntvde import NTVDeIE
														
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -4,13 +4,18 @@ from __future__ import unicode_literals
 
															 import re
														
 
															 from .common import InfoExtractor
														
 
															-from ..compat import compat_urllib_parse_unquote
														
 
															+from ..compat import (
														
 
															+    compat_str,
														
 
															+    compat_urllib_parse_unquote,
														
 
															+)
														
 
															 from ..utils import (
														
 
															     ExtractorError,
														
 
															     int_or_none,
														
 
															     JSON_LD_RE,
														
 
															+    NO_DEFAULT,
														
 
															     parse_age_limit,
														
 
															     parse_duration,
														
 
															+    try_get,
														
 
															 )
														
@@ -394,6 +399,148 @@ class NRKTVEpisodeIE(InfoExtractor):
 
															             'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)
														
 
															+class NRKTVSerieBaseIE(InfoExtractor):
														
 
															+    def _extract_series(self, webpage, display_id, fatal=True):
														
 
															+        config = self._parse_json(
														
 
															+            self._search_regex(
														
 
															+                r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config',
														
 
															+                default='{}' if not fatal else NO_DEFAULT),
														
 
															+            display_id, fatal=False)
														
 
															+        if not config:
														
 
															+            return
														
 
															+        return try_get(config, lambda x: x['series'], dict)
														
 
															+
														
 
															+    def _extract_episodes(self, season):
														
 
															+        entries = []
														
 
															+        if not isinstance(season, dict):
														
 
															+            return entries
														
 
															+        episodes = season.get('episodes')
														
 
															+        if not isinstance(episodes, list):
														
 
															+            return entries
														
 
															+        for episode in episodes:
														
 
															+            nrk_id = episode.get('prfId')
														
 
															+            if not nrk_id or not isinstance(nrk_id, compat_str):
														
 
															+                continue
														
 
															+            entries.append(self.url_result(
														
 
															+                'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
														
 
															+        return entries
														
 
															+
														
 
															+
														
 
															+class NRKTVSeasonIE(NRKTVSerieBaseIE):
														
 
															+    _VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
														
 
															+    _TEST = {
														
 
															+        'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
														
 
															+        'info_dict': {
														
 
															+            'id': '1',
														
 
															+            'title': 'Sesong 1',
														
 
															+        },
														
 
															+        'playlist_mincount': 30,
														
 
															+    }
														
 
															+
														
 
															+    @classmethod
														
 
															+    def suitable(cls, url):
														
 
															+        return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url)
														
 
															+                else super(NRKTVSeasonIE, cls).suitable(url))
														
 
															+
														
 
															+    def _real_extract(self, url):
														
 
															+        display_id = self._match_id(url)
														
 
															+
														
 
															+        webpage = self._download_webpage(url, display_id)
														
 
															+
														
 
															+        series = self._extract_series(webpage, display_id)
														
 
															+
														
 
															+        season = next(
														
 
															+            s for s in series['seasons']
														
 
															+            if int(display_id) == s.get('seasonNumber'))
														
 
															+
														
 
															+        title = try_get(season, lambda x: x['titles']['title'], compat_str)
														
 
															+        return self.playlist_result(
														
 
															+            self._extract_episodes(season), display_id, title)
														
 
															+
														
 
															+
														
 
															+class NRKTVSeriesIE(NRKTVSerieBaseIE):
														
 
															+    _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
														
 
															+    _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
														
 
															+    _TESTS = [{
														
 
															+        # new layout
														
 
															+        'url': 'https://tv.nrk.no/serie/backstage',
														
 
															+        'info_dict': {
														
 
															+            'id': 'backstage',
														
 
															+            'title': 'Backstage',
														
 
															+            'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
														
 
															+        },
														
 
															+        'playlist_mincount': 60,
														
 
															+    }, {
														
 
															+        # old layout
														
 
															+        'url': 'https://tv.nrk.no/serie/groenn-glede',
														
 
															+        'info_dict': {
														
 
															+            'id': 'groenn-glede',
														
 
															+            'title': 'Grønn glede',
														
 
															+            'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
														
 
															+        },
														
 
															+        'playlist_mincount': 9,
														
 
															+    }, {
														
 
															+        'url': 'http://tv.nrksuper.no/serie/labyrint',
														
 
															+        'info_dict': {
														
 
															+            'id': 'labyrint',
														
 
															+            'title': 'Labyrint',
														
 
															+            'description': 'md5:58afd450974c89e27d5a19212eee7115',
														
 
															+        },
														
 
															+        'playlist_mincount': 3,
														
 
															+    }, {
														
 
															+        'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
														
 
															+        'only_matching': True,
														
 
															+    }, {
														
 
															+        'url': 'https://tv.nrk.no/serie/saving-the-human-race',
														
 
															+        'only_matching': True,
														
 
															+    }, {
														
 
															+        'url': 'https://tv.nrk.no/serie/postmann-pat',
														
 
															+        'only_matching': True,
														
 
															+    }]
														
 
															+
														
 
															+    @classmethod
														
 
															+    def suitable(cls, url):
														
 
															+        return (
														
 
															+            False if any(ie.suitable(url)
														
 
															+                         for ie in (NRKTVIE, NRKTVEpisodeIE, NRKTVSeasonIE))
														
 
															+            else super(NRKTVSeriesIE, cls).suitable(url))
														
 
															+
														
 
															+    def _real_extract(self, url):
														
 
															+        series_id = self._match_id(url)
														
 
															+
														
 
															+        webpage = self._download_webpage(url, series_id)
														
 
															+
														
 
															+        # New layout (e.g. https://tv.nrk.no/serie/backstage)
														
 
															+        series = self._extract_series(webpage, series_id, fatal=False)
														
 
															+        if series:
														
 
															+            title = try_get(series, lambda x: x['titles']['title'], compat_str)
														
 
															+            description = try_get(
														
 
															+                series, lambda x: x['titles']['subtitle'], compat_str)
														
 
															+            entries = []
														
 
															+            for season in series['seasons']:
														
 
															+                entries.extend(self._extract_episodes(season))
														
 
															+            return self.playlist_result(entries, series_id, title, description)
														
 
															+
														
 
															+        # Old layout (e.g. https://tv.nrk.no/serie/groenn-glede)
														
 
															+        entries = [
														
 
															+            self.url_result(
														
 
															+                'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
														
 
															+                    series=series_id, season=season_id))
														
 
															+            for season_id in re.findall(self._ITEM_RE, webpage)
														
 
															+        ]
														
 
															+
														
 
															+        title = self._html_search_meta(
														
 
															+            'seriestitle', webpage,
														
 
															+            'title', default=None) or self._og_search_title(
														
 
															+            webpage, fatal=False)
														
 
															+
														
 
															+        description = self._html_search_meta(
														
 
															+            'series_description', webpage,
														
 
															+            'description', default=None) or self._og_search_description(webpage)
														
 
															+
														
 
															+        return self.playlist_result(entries, series_id, title, description)
														
 
															+
														
 
															+
														
 
															 class NRKTVDirekteIE(NRKTVIE):
														
 
															     IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
														
 
															     _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
														
@@ -473,65 +620,6 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE):
 
															             r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
														
 
															-class NRKTVSeriesIE(InfoExtractor):
														
 
															-    _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
														
 
															-    _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
														
 
															-    _TESTS = [{
														
 
															-        'url': 'https://tv.nrk.no/serie/groenn-glede',
														
 
															-        'info_dict': {
														
 
															-            'id': 'groenn-glede',
														
 
															-            'title': 'Grønn glede',
														
 
															-            'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
														
 
															-        },
														
 
															-        'playlist_mincount': 9,
														
 
															-    }, {
														
 
															-        'url': 'http://tv.nrksuper.no/serie/labyrint',
														
 
															-        'info_dict': {
														
 
															-            'id': 'labyrint',
														
 
															-            'title': 'Labyrint',
														
 
															-            'description': 'md5:58afd450974c89e27d5a19212eee7115',
														
 
															-        },
														
 
															-        'playlist_mincount': 3,
														
 
															-    }, {
														
 
															-        'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
														
 
															-        'only_matching': True,
														
 
															-    }, {
														
 
															-        'url': 'https://tv.nrk.no/serie/saving-the-human-race',
														
 
															-        'only_matching': True,
														
 
															-    }, {
														
 
															-        'url': 'https://tv.nrk.no/serie/postmann-pat',
														
 
															-        'only_matching': True,
														
 
															-    }]
														
 
															-
														
 
															-    @classmethod
														
 
															-    def suitable(cls, url):
														
 
															-        return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url)
														
 
															-                else super(NRKTVSeriesIE, cls).suitable(url))
														
 
															-
														
 
															-    def _real_extract(self, url):
														
 
															-        series_id = self._match_id(url)
														
 
															-
														
 
															-        webpage = self._download_webpage(url, series_id)
														
 
															-
														
 
															-        entries = [
														
 
															-            self.url_result(
														
 
															-                'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
														
 
															-                    series=series_id, season=season_id))
														
 
															-            for season_id in re.findall(self._ITEM_RE, webpage)
														
 
															-        ]
														
 
															-
														
 
															-        title = self._html_search_meta(
														
 
															-            'seriestitle', webpage,
														
 
															-            'title', default=None) or self._og_search_title(
														
 
															-            webpage, fatal=False)
														
 
															-
														
 
															-        description = self._html_search_meta(
														
 
															-            'series_description', webpage,
														
 
															-            'description', default=None) or self._og_search_description(webpage)
														
 
															-
														
 
															-        return self.playlist_result(entries, series_id, title, description)
														
 
															-
														
 
															-
														
 
															 class NRKSkoleIE(InfoExtractor):
														
 
															     IE_DESC = 'NRK Skole'
														
 
															     _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'