|  | @@ -5,19 +5,27 @@ import hashlib
 | 
											
												
													
														|  |  import re
 |  |  import re
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  from .common import InfoExtractor
 |  |  from .common import InfoExtractor
 | 
											
												
													
														|  | -from ..compat import compat_parse_qs
 |  | 
 | 
											
												
													
														|  | 
 |  | +from ..compat import (
 | 
											
												
													
														|  | 
 |  | +    compat_parse_qs,
 | 
											
												
													
														|  | 
 |  | +    compat_urlparse,
 | 
											
												
													
														|  | 
 |  | +)
 | 
											
												
													
														|  |  from ..utils import (
 |  |  from ..utils import (
 | 
											
												
													
														|  | 
 |  | +    ExtractorError,
 | 
											
												
													
														|  |      int_or_none,
 |  |      int_or_none,
 | 
											
												
													
														|  |      float_or_none,
 |  |      float_or_none,
 | 
											
												
													
														|  | 
 |  | +    parse_iso8601,
 | 
											
												
													
														|  | 
 |  | +    smuggle_url,
 | 
											
												
													
														|  | 
 |  | +    strip_jsonp,
 | 
											
												
													
														|  |      unified_timestamp,
 |  |      unified_timestamp,
 | 
											
												
													
														|  | 
 |  | +    unsmuggle_url,
 | 
											
												
													
														|  |      urlencode_postdata,
 |  |      urlencode_postdata,
 | 
											
												
													
														|  |  )
 |  |  )
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  class BiliBiliIE(InfoExtractor):
 |  |  class BiliBiliIE(InfoExtractor):
 | 
											
												
													
														|  | -    _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)'
 |  | 
 | 
											
												
													
														|  | 
 |  | +    _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -    _TEST = {
 |  | 
 | 
											
												
													
														|  | 
 |  | +    _TESTS = [{
 | 
											
												
													
														|  |          'url': 'http://www.bilibili.tv/video/av1074402/',
 |  |          'url': 'http://www.bilibili.tv/video/av1074402/',
 | 
											
												
													
														|  |          'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
 |  |          'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
 | 
											
												
													
														|  |          'info_dict': {
 |  |          'info_dict': {
 | 
											
										
											
												
													
														|  | @@ -32,25 +40,61 @@ class BiliBiliIE(InfoExtractor):
 | 
											
												
													
														|  |              'uploader': '菊子桑',
 |  |              'uploader': '菊子桑',
 | 
											
												
													
														|  |              'uploader_id': '156160',
 |  |              'uploader_id': '156160',
 | 
											
												
													
														|  |          },
 |  |          },
 | 
											
												
													
														|  | -    }
 |  | 
 | 
											
												
													
														|  | 
 |  | +    }, {
 | 
											
												
													
														|  | 
 |  | +        # Tested in BiliBiliBangumiIE
 | 
											
												
													
														|  | 
 |  | +        'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
 | 
											
												
													
														|  | 
 |  | +        'only_matching': True,
 | 
											
												
													
														|  | 
 |  | +    }, {
 | 
											
												
													
														|  | 
 |  | +        'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
 | 
											
												
													
														|  | 
 |  | +        'md5': '3f721ad1e75030cc06faf73587cfec57',
 | 
											
												
													
														|  | 
 |  | +        'info_dict': {
 | 
											
												
													
														|  | 
 |  | +            'id': '100643',
 | 
											
												
													
														|  | 
 |  | +            'ext': 'mp4',
 | 
											
												
													
														|  | 
 |  | +            'title': 'CHAOS;CHILD',
 | 
											
												
													
														|  | 
 |  | +            'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        'skip': 'Geo-restricted to China',
 | 
											
												
													
														|  | 
 |  | +    }]
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |      _APP_KEY = '84956560bc028eb7'
 |  |      _APP_KEY = '84956560bc028eb7'
 | 
											
												
													
														|  |      _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
 |  |      _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | 
 |  | +    def _report_error(self, result):
 | 
											
												
													
														|  | 
 |  | +        if 'message' in result:
 | 
											
												
													
														|  | 
 |  | +            raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
 | 
											
												
													
														|  | 
 |  | +        elif 'code' in result:
 | 
											
												
													
														|  | 
 |  | +            raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
 | 
											
												
													
														|  | 
 |  | +        else:
 | 
											
												
													
														|  | 
 |  | +            raise ExtractorError('Can\'t extract Bangumi episode ID')
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  |      def _real_extract(self, url):
 |  |      def _real_extract(self, url):
 | 
											
												
													
														|  | -        video_id = self._match_id(url)
 |  | 
 | 
											
												
													
														|  | 
 |  | +        url, smuggled_data = unsmuggle_url(url, {})
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +        mobj = re.match(self._VALID_URL, url)
 | 
											
												
													
														|  | 
 |  | +        video_id = mobj.group('id')
 | 
											
												
													
														|  | 
 |  | +        anime_id = mobj.group('anime_id')
 | 
											
												
													
														|  |          webpage = self._download_webpage(url, video_id)
 |  |          webpage = self._download_webpage(url, video_id)
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -        if 'anime/v' not in url:
 |  | 
 | 
											
												
													
														|  | 
 |  | +        if 'anime/' not in url:
 | 
											
												
													
														|  |              cid = compat_parse_qs(self._search_regex(
 |  |              cid = compat_parse_qs(self._search_regex(
 | 
											
												
													
														|  |                  [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
 |  |                  [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
 | 
											
												
													
														|  |                   r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
 |  |                   r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
 | 
											
												
													
														|  |                  webpage, 'player parameters'))['cid'][0]
 |  |                  webpage, 'player parameters'))['cid'][0]
 | 
											
												
													
														|  |          else:
 |  |          else:
 | 
											
												
													
														|  | 
 |  | +            if 'no_bangumi_tip' not in smuggled_data:
 | 
											
												
													
														|  | 
 |  | +                self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % (
 | 
											
												
													
														|  | 
 |  | +                    video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
 | 
											
												
													
														|  | 
 |  | +            headers = {
 | 
											
												
													
														|  | 
 |  | +                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
 | 
											
												
													
														|  | 
 |  | +            }
 | 
											
												
													
														|  | 
 |  | +            headers.update(self.geo_verification_headers())
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  |              js = self._download_json(
 |  |              js = self._download_json(
 | 
											
												
													
														|  |                  'http://bangumi.bilibili.com/web_api/get_source', video_id,
 |  |                  'http://bangumi.bilibili.com/web_api/get_source', video_id,
 | 
											
												
													
														|  |                  data=urlencode_postdata({'episode_id': video_id}),
 |  |                  data=urlencode_postdata({'episode_id': video_id}),
 | 
											
												
													
														|  | -                headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
 |  | 
 | 
											
												
													
														|  | 
 |  | +                headers=headers)
 | 
											
												
													
														|  | 
 |  | +            if 'result' not in js:
 | 
											
												
													
														|  | 
 |  | +                self._report_error(js)
 | 
											
												
													
														|  |              cid = js['result']['cid']
 |  |              cid = js['result']['cid']
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |          payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
 |  |          payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
 | 
											
										
											
												
													
														|  | @@ -58,7 +102,11 @@ class BiliBiliIE(InfoExtractor):
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |          video_info = self._download_json(
 |  |          video_info = self._download_json(
 | 
											
												
													
														|  |              'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
 |  |              'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
 | 
											
												
													
														|  | -            video_id, note='Downloading video info page')
 |  | 
 | 
											
												
													
														|  | 
 |  | +            video_id, note='Downloading video info page',
 | 
											
												
													
														|  | 
 |  | +            headers=self.geo_verification_headers())
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +        if 'durl' not in video_info:
 | 
											
												
													
														|  | 
 |  | +            self._report_error(video_info)
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |          entries = []
 |  |          entries = []
 | 
											
												
													
														|  |  
 |  |  
 | 
											
										
											
												
													
														|  | @@ -85,7 +133,7 @@ class BiliBiliIE(InfoExtractor):
 | 
											
												
													
														|  |          title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
 |  |          title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
 | 
											
												
													
														|  |          description = self._html_search_meta('description', webpage)
 |  |          description = self._html_search_meta('description', webpage)
 | 
											
												
													
														|  |          timestamp = unified_timestamp(self._html_search_regex(
 |  |          timestamp = unified_timestamp(self._html_search_regex(
 | 
											
												
													
														|  | -            r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
 |  | 
 | 
											
												
													
														|  | 
 |  | +            r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
 | 
											
												
													
														|  |          thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
 |  |          thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |          # TODO 'view_count' requires deobfuscating Javascript
 |  |          # TODO 'view_count' requires deobfuscating Javascript
 | 
											
										
											
												
													
														|  | @@ -99,7 +147,7 @@ class BiliBiliIE(InfoExtractor):
 | 
											
												
													
														|  |          }
 |  |          }
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |          uploader_mobj = re.search(
 |  |          uploader_mobj = re.search(
 | 
											
												
													
														|  | -            r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
 |  | 
 | 
											
												
													
														|  | 
 |  | +            r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
 | 
											
												
													
														|  |              webpage)
 |  |              webpage)
 | 
											
												
													
														|  |          if uploader_mobj:
 |  |          if uploader_mobj:
 | 
											
												
													
														|  |              info.update({
 |  |              info.update({
 | 
											
										
											
												
													
														|  | @@ -123,3 +171,70 @@ class BiliBiliIE(InfoExtractor):
 | 
											
												
													
														|  |                  'description': description,
 |  |                  'description': description,
 | 
											
												
													
														|  |                  'entries': entries,
 |  |                  'entries': entries,
 | 
											
												
													
														|  |              }
 |  |              }
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +class BiliBiliBangumiIE(InfoExtractor):
 | 
											
												
													
														|  | 
 |  | +    _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    IE_NAME = 'bangumi.bilibili.com'
 | 
											
												
													
														|  | 
 |  | +    IE_DESC = 'BiliBili番剧'
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    _TESTS = [{
 | 
											
												
													
														|  | 
 |  | +        'url': 'http://bangumi.bilibili.com/anime/1869',
 | 
											
												
													
														|  | 
 |  | +        'info_dict': {
 | 
											
												
													
														|  | 
 |  | +            'id': '1869',
 | 
											
												
													
														|  | 
 |  | +            'title': '混沌武士',
 | 
											
												
													
														|  | 
 |  | +            'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        'playlist_count': 26,
 | 
											
												
													
														|  | 
 |  | +    }, {
 | 
											
												
													
														|  | 
 |  | +        'url': 'http://bangumi.bilibili.com/anime/1869',
 | 
											
												
													
														|  | 
 |  | +        'info_dict': {
 | 
											
												
													
														|  | 
 |  | +            'id': '1869',
 | 
											
												
													
														|  | 
 |  | +            'title': '混沌武士',
 | 
											
												
													
														|  | 
 |  | +            'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +        'playlist': [{
 | 
											
												
													
														|  | 
 |  | +            'md5': '91da8621454dd58316851c27c68b0c13',
 | 
											
												
													
														|  | 
 |  | +            'info_dict': {
 | 
											
												
													
														|  | 
 |  | +                'id': '40062',
 | 
											
												
													
														|  | 
 |  | +                'ext': 'mp4',
 | 
											
												
													
														|  | 
 |  | +                'title': '混沌武士',
 | 
											
												
													
														|  | 
 |  | +                'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
 | 
											
												
													
														|  | 
 |  | +                'timestamp': 1414538739,
 | 
											
												
													
														|  | 
 |  | +                'upload_date': '20141028',
 | 
											
												
													
														|  | 
 |  | +                'episode': '疾风怒涛 Tempestuous Temperaments',
 | 
											
												
													
														|  | 
 |  | +                'episode_number': 1,
 | 
											
												
													
														|  | 
 |  | +            },
 | 
											
												
													
														|  | 
 |  | +        }],
 | 
											
												
													
														|  | 
 |  | +        'params': {
 | 
											
												
													
														|  | 
 |  | +            'playlist_items': '1',
 | 
											
												
													
														|  | 
 |  | +        },
 | 
											
												
													
														|  | 
 |  | +    }]
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    @classmethod
 | 
											
												
													
														|  | 
 |  | +    def suitable(cls, url):
 | 
											
												
													
														|  | 
 |  | +        return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    def _real_extract(self, url):
 | 
											
												
													
														|  | 
 |  | +        bangumi_id = self._match_id(url)
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +        # Sometimes this API returns a JSONP response
 | 
											
												
													
														|  | 
 |  | +        season_info = self._download_json(
 | 
											
												
													
														|  | 
 |  | +            'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
 | 
											
												
													
														|  | 
 |  | +            bangumi_id, transform_source=strip_jsonp)['result']
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +        entries = [{
 | 
											
												
													
														|  | 
 |  | +            '_type': 'url_transparent',
 | 
											
												
													
														|  | 
 |  | +            'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
 | 
											
												
													
														|  | 
 |  | +            'ie_key': BiliBiliIE.ie_key(),
 | 
											
												
													
														|  | 
 |  | +            'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
 | 
											
												
													
														|  | 
 |  | +            'episode': episode.get('index_title'),
 | 
											
												
													
														|  | 
 |  | +            'episode_number': int_or_none(episode.get('index')),
 | 
											
												
													
														|  | 
 |  | +        } for episode in season_info['episodes']]
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +        entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +        return self.playlist_result(
 | 
											
												
													
														|  | 
 |  | +            entries, bangumi_id,
 | 
											
												
													
														|  | 
 |  | +            season_info.get('bangumi_title'), season_info.get('evaluate'))
 |