6 years ago · ff60ec8f02
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -12,11 +12,16 @@ from ..utils import (
 
				     ExtractorError,
			
 
				     fix_xml_ampersands,
			
 
				     int_or_none,
			
 
				+    merge_dicts,
			
 
				     orderedSet,
			
 
				     parse_duration,
			
 
				     qualities,
			
 
				+    str_or_none,
			
 
				     strip_jsonp,
			
 
				     unified_strdate,
			
 
				+    unified_timestamp,
			
 
				+    url_or_none,
			
 
				+    urlencode_postdata,
			
 
				 )
			
 
				 
			
 
				 
			
@@ -176,9 +181,118 @@ class NPOIE(NPOBaseIE):
 
				 
			
 
				     def _real_extract(self, url):
			
 
				         video_id = self._match_id(url)
			
 
				-        return self._get_info(video_id)
			
 
				+        try:
			
 
				+            return self._get_info(url, video_id)
			
 
				+        except ExtractorError:
			
 
				+            return self._get_old_info(video_id)
			
 
				+
			
 
				+    def _get_info(self, url, video_id):
			
 
				+        token = self._download_json(
			
 
				+            'https://www.npostart.nl/api/token', video_id,
			
 
				+            'Downloading token', headers={
			
 
				+                'Referer': url,
			
 
				+                'X-Requested-With': 'XMLHttpRequest',
			
 
				+            })['token']
			
 
				+
			
 
				+        player = self._download_json(
			
 
				+            'https://www.npostart.nl/player/%s' % video_id, video_id,
			
 
				+            'Downloading player JSON', data=urlencode_postdata({
			
 
				+                'autoplay': 0,
			
 
				+                'share': 1,
			
 
				+                'pageUrl': url,
			
 
				+                'hasAdConsent': 0,
			
 
				+                '_token': token,
			
 
				+            }))
			
 
				+
			
 
				+        player_token = player['token']
			
 
				+
			
 
				+        format_urls = set()
			
 
				+        formats = []
			
 
				+        for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'):
			
 
				+            streams = self._download_json(
			
 
				+                'https://start-player.npo.nl/video/%s/streams' % video_id,
			
 
				+                video_id, 'Downloading %s profile JSON' % profile, fatal=False,
			
 
				+                query={
			
 
				+                    'profile': profile,
			
 
				+                    'quality': 'npo',
			
 
				+                    'tokenId': player_token,
			
 
				+                    'streamType': 'broadcast',
			
 
				+                })
			
 
				+            if not streams:
			
 
				+                continue
			
 
				+            stream = streams.get('stream')
			
 
				+            if not isinstance(stream, dict):
			
 
				+                continue
			
 
				+            stream_url = url_or_none(stream.get('src'))
			
 
				+            if not stream_url or stream_url in format_urls:
			
 
				+                continue
			
 
				+            format_urls.add(stream_url)
			
 
				+            if stream.get('protection') is not None:
			
 
				+                continue
			
 
				+            stream_type = stream.get('type')
			
 
				+            stream_ext = determine_ext(stream_url)
			
 
				+            if stream_type == 'application/dash+xml' or stream_ext == 'mpd':
			
 
				+                formats.extend(self._extract_mpd_formats(
			
 
				+                    stream_url, video_id, mpd_id='dash', fatal=False))
			
 
				+            elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8':
			
 
				+                formats.extend(self._extract_m3u8_formats(
			
 
				+                    stream_url, video_id, ext='mp4',
			
 
				+                    entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
			
 
				+            elif '.ism/Manifest' in stream_url:
			
 
				+                formats.extend(self._extract_ism_formats(
			
 
				+                    stream_url, video_id, ism_id='mss', fatal=False))
			
 
				+            else:
			
 
				+                formats.append({
			
 
				+                    'url': stream_url,
			
 
				+                })
			
 
				+
			
 
				+        self._sort_formats(formats)
			
 
				+
			
 
				+        info = {
			
 
				+            'id': video_id,
			
 
				+            'title': video_id,
			
 
				+            'formats': formats,
			
 
				+        }
			
 
				 
			
 
				-    def _get_info(self, video_id):
			
 
				+        embed_url = url_or_none(player.get('embedUrl'))
			
 
				+        if embed_url:
			
 
				+            webpage = self._download_webpage(
			
 
				+                embed_url, video_id, 'Downloading embed page', fatal=False)
			
 
				+            if webpage:
			
 
				+                video = self._parse_json(
			
 
				+                    self._search_regex(
			
 
				+                        r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video',
			
 
				+                        default='{}'), video_id)
			
 
				+                if video:
			
 
				+                    title = video.get('episodeTitle')
			
 
				+                    subtitles = {}
			
 
				+                    subtitles_list = video.get('subtitles')
			
 
				+                    if isinstance(subtitles_list, list):
			
 
				+                        for cc in subtitles_list:
			
 
				+                            cc_url = url_or_none(cc.get('src'))
			
 
				+                            if not cc_url:
			
 
				+                                continue
			
 
				+                            lang = str_or_none(cc.get('language')) or 'nl'
			
 
				+                            subtitles.setdefault(lang, []).append({
			
 
				+                                'url': cc_url,
			
 
				+                            })
			
 
				+                    return merge_dicts({
			
 
				+                        'title': title,
			
 
				+                        'description': video.get('description'),
			
 
				+                        'thumbnail': url_or_none(
			
 
				+                            video.get('still_image_url') or video.get('orig_image_url')),
			
 
				+                        'duration': int_or_none(video.get('duration')),
			
 
				+                        'timestamp': unified_timestamp(video.get('broadcastDate')),
			
 
				+                        'creator': video.get('channel'),
			
 
				+                        'series': video.get('title'),
			
 
				+                        'episode': title,
			
 
				+                        'episode_number': int_or_none(video.get('episodeNumber')),
			
 
				+                        'subtitles': subtitles,
			
 
				+                    }, info)
			
 
				+
			
 
				+        return info
			
 
				+
			
 
				+    def _get_old_info(self, video_id):
			
 
				         metadata = self._download_json(
			
 
				             'http://e.omroep.nl/metadata/%s' % video_id,
			
 
				             video_id,
			
@@ -280,7 +394,7 @@ class NPOIE(NPOBaseIE):
 
				             # JSON
			
 
				             else:
			
 
				                 video_url = stream_info.get('url')
			
 
				-            if not video_url or video_url in urls:
			
 
				+            if not video_url or 'vodnotavailable.' in video_url or video_url in urls:
			
 
				                 continue
			
 
				             urls.add(video_url)
			
 
				             if determine_ext(video_url) == 'm3u8':