|  | @@ -4,28 +4,35 @@ from __future__ import unicode_literals
 | 
	
		
			
				|  |  |  import re
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  from .common import InfoExtractor
 | 
	
		
			
				|  |  | -from ..compat import compat_urllib_parse_urlencode
 | 
	
		
			
				|  |  | -from ..utils import sanitized_Request
 | 
	
		
			
				|  |  | +from ..utils import (
 | 
	
		
			
				|  |  | +    HEADRequest,
 | 
	
		
			
				|  |  | +    ExtractorError,
 | 
	
		
			
				|  |  | +    int_or_none,
 | 
	
		
			
				|  |  | +    update_url_query,
 | 
	
		
			
				|  |  | +    qualities,
 | 
	
		
			
				|  |  | +    get_element_by_attribute,
 | 
	
		
			
				|  |  | +    clean_html,
 | 
	
		
			
				|  |  | +)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  class SinaIE(InfoExtractor):
 | 
	
		
			
				|  |  | -    _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/
 | 
	
		
			
				|  |  | -                        (
 | 
	
		
			
				|  |  | -                            (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
 | 
	
		
			
				|  |  | -                            |
 | 
	
		
			
				|  |  | +    _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/
 | 
	
		
			
				|  |  | +                        (?:
 | 
	
		
			
				|  |  | +                            (?:view/|.*\#)(?P<video_id>\d+)|
 | 
	
		
			
				|  |  | +                            .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)|
 | 
	
		
			
				|  |  |                              # This is used by external sites like Weibo
 | 
	
		
			
				|  |  | -                            (api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
 | 
	
		
			
				|  |  | +                            api/sinawebApi/outplay.php/(?P<token>.+?)\.swf
 | 
	
		
			
				|  |  |                          )
 | 
	
		
			
				|  |  |                    '''
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      _TESTS = [
 | 
	
		
			
				|  |  |          {
 | 
	
		
			
				|  |  | -            'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
 | 
	
		
			
				|  |  | -            'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
 | 
	
		
			
				|  |  | +            'url': 'http://video.sina.com.cn/news/spj/topvideoes20160504/?opsubject_id=top1#250576622',
 | 
	
		
			
				|  |  | +            'md5': 'd38433e2fc886007729735650ae4b3e9',
 | 
	
		
			
				|  |  |              'info_dict': {
 | 
	
		
			
				|  |  | -                'id': '110028898',
 | 
	
		
			
				|  |  | -                'ext': 'flv',
 | 
	
		
			
				|  |  | -                'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
 | 
	
		
			
				|  |  | +                'id': '250576622',
 | 
	
		
			
				|  |  | +                'ext': 'mp4',
 | 
	
		
			
				|  |  | +                'title': '现场:克鲁兹宣布退选 特朗普将稳获提名',
 | 
	
		
			
				|  |  |              }
 | 
	
		
			
				|  |  |          },
 | 
	
		
			
				|  |  |          {
 | 
	
	
		
			
				|  | @@ -35,37 +42,74 @@ class SinaIE(InfoExtractor):
 | 
	
		
			
				|  |  |                  'ext': 'flv',
 | 
	
		
			
				|  |  |                  'title': '军方提高对朝情报监视级别',
 | 
	
		
			
				|  |  |              },
 | 
	
		
			
				|  |  | +            'skip': 'the page does not exist or has been deleted',
 | 
	
		
			
				|  |  | +        },
 | 
	
		
			
				|  |  | +        {
 | 
	
		
			
				|  |  | +            'url': 'http://video.sina.com.cn/view/250587748.html',
 | 
	
		
			
				|  |  | +            'md5': '3d1807a25c775092aab3bc157fff49b4',
 | 
	
		
			
				|  |  | +            'info_dict': {
 | 
	
		
			
				|  |  | +                'id': '250587748',
 | 
	
		
			
				|  |  | +                'ext': 'mp4',
 | 
	
		
			
				|  |  | +                'title': '瞬间泪目:8年前汶川地震珍贵视频首曝光',
 | 
	
		
			
				|  |  | +            },
 | 
	
		
			
				|  |  |          },
 | 
	
		
			
				|  |  |      ]
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    def _extract_video(self, video_id):
 | 
	
		
			
				|  |  | -        data = compat_urllib_parse_urlencode({'vid': video_id})
 | 
	
		
			
				|  |  | -        url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
 | 
	
		
			
				|  |  | -                                     video_id, 'Downloading video url')
 | 
	
		
			
				|  |  | -        image_page = self._download_webpage(
 | 
	
		
			
				|  |  | -            'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
 | 
	
		
			
				|  |  | -            video_id, 'Downloading thumbnail info')
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        return {'id': video_id,
 | 
	
		
			
				|  |  | -                'url': url_doc.find('./durl/url').text,
 | 
	
		
			
				|  |  | -                'ext': 'flv',
 | 
	
		
			
				|  |  | -                'title': url_doc.find('./vname').text,
 | 
	
		
			
				|  |  | -                'thumbnail': image_page.split('=')[1],
 | 
	
		
			
				|  |  | -                }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |      def _real_extract(self, url):
 | 
	
		
			
				|  |  |          mobj = re.match(self._VALID_URL, url)
 | 
	
		
			
				|  |  | -        video_id = mobj.group('id')
 | 
	
		
			
				|  |  | -        if mobj.group('token') is not None:
 | 
	
		
			
				|  |  | -            # The video id is in the redirected url
 | 
	
		
			
				|  |  | -            self.to_screen('Getting video id')
 | 
	
		
			
				|  |  | -            request = sanitized_Request(url)
 | 
	
		
			
				|  |  | -            request.get_method = lambda: 'HEAD'
 | 
	
		
			
				|  |  | -            (_, urlh) = self._download_webpage_handle(request, 'NA', False)
 | 
	
		
			
				|  |  | -            return self._real_extract(urlh.geturl())
 | 
	
		
			
				|  |  | -        elif video_id is None:
 | 
	
		
			
				|  |  | -            pseudo_id = mobj.group('pseudo_id')
 | 
	
		
			
				|  |  | -            webpage = self._download_webpage(url, pseudo_id)
 | 
	
		
			
				|  |  | -            video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, 'video id')
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        return self._extract_video(video_id)
 | 
	
		
			
				|  |  | +        video_id = mobj.group('video_id')
 | 
	
		
			
				|  |  | +        if not video_id:
 | 
	
		
			
				|  |  | +            if mobj.group('token') is not None:
 | 
	
		
			
				|  |  | +                # The video id is in the redirected url
 | 
	
		
			
				|  |  | +                self.to_screen('Getting video id')
 | 
	
		
			
				|  |  | +                request = HEADRequest(url)
 | 
	
		
			
				|  |  | +                (_, urlh) = self._download_webpage_handle(request, 'NA', False)
 | 
	
		
			
				|  |  | +                return self._real_extract(urlh.geturl())
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                pseudo_id = mobj.group('pseudo_id')
 | 
	
		
			
				|  |  | +                webpage = self._download_webpage(url, pseudo_id)
 | 
	
		
			
				|  |  | +                error = get_element_by_attribute('class', 'errtitle', webpage)
 | 
	
		
			
				|  |  | +                if error:
 | 
	
		
			
				|  |  | +                    raise ExtractorError('%s said: %s' % (
 | 
	
		
			
				|  |  | +                        self.IE_NAME, clean_html(error)), expected=True)
 | 
	
		
			
				|  |  | +                video_id = self._search_regex(
 | 
	
		
			
				|  |  | +                    r"video_id\s*:\s*'(\d+)'", webpage, 'video id')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        video_data = self._download_json(
 | 
	
		
			
				|  |  | +            'http://s.video.sina.com.cn/video/h5play',
 | 
	
		
			
				|  |  | +            video_id, query={'video_id': video_id})
 | 
	
		
			
				|  |  | +        if video_data['code'] != 1:
 | 
	
		
			
				|  |  | +            raise ExtractorError('%s said: %s' % (
 | 
	
		
			
				|  |  | +                self.IE_NAME, video_data['message']), expected=True)
 | 
	
		
			
				|  |  | +        else:
 | 
	
		
			
				|  |  | +            video_data = video_data['data']
 | 
	
		
			
				|  |  | +            title = video_data['title']
 | 
	
		
			
				|  |  | +            description = video_data.get('description')
 | 
	
		
			
				|  |  | +            if description:
 | 
	
		
			
				|  |  | +                description = description.strip()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            preference = qualities(['cif', 'sd', 'hd', 'fhd', 'ffd'])
 | 
	
		
			
				|  |  | +            formats = []
 | 
	
		
			
				|  |  | +            for quality_id, quality in video_data.get('videos', {}).get('mp4', {}).items():
 | 
	
		
			
				|  |  | +                file_api = quality.get('file_api')
 | 
	
		
			
				|  |  | +                file_id = quality.get('file_id')
 | 
	
		
			
				|  |  | +                if not file_api or not file_id:
 | 
	
		
			
				|  |  | +                    continue
 | 
	
		
			
				|  |  | +                formats.append({
 | 
	
		
			
				|  |  | +                    'format_id': quality_id,
 | 
	
		
			
				|  |  | +                    'url': update_url_query(file_api, {'vid': file_id}),
 | 
	
		
			
				|  |  | +                    'preference': preference(quality_id),
 | 
	
		
			
				|  |  | +                    'ext': 'mp4',
 | 
	
		
			
				|  |  | +                })
 | 
	
		
			
				|  |  | +            self._sort_formats(formats)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            return {
 | 
	
		
			
				|  |  | +                'id': video_id,
 | 
	
		
			
				|  |  | +                'title': title,
 | 
	
		
			
				|  |  | +                'description': description,
 | 
	
		
			
				|  |  | +                'thumbnail': video_data.get('image'),
 | 
	
		
			
				|  |  | +                'duration': int_or_none(video_data.get('length')),
 | 
	
		
			
				|  |  | +                'timestamp': int_or_none(video_data.get('create_time')),
 | 
	
		
			
				|  |  | +                'formats': formats,
 | 
	
		
			
				|  |  | +            }
 |