13 年之前 · 51661d8600
--- a/youtube-dl
+++ b/youtube-dl
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -13,6 +13,8 @@ import urllib
 
															 import urllib2
														
 
															 import email.utils
														
 
															 import xml.etree.ElementTree
														
 
															+import random
														
 
															+import math
														
 
															 from urlparse import parse_qs
														
 
															 try:
														
@@ -2955,3 +2957,133 @@ class MTVIE(InfoExtractor):
 
															 		}
														
 
															 		return [info]
														
 
															+
														
 
															+
														
 
															+
														
 
															+class YoukuIE(InfoExtractor):
														
 
															+
														
 
															+	_VALID_URL =  r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
														
 
															+	IE_NAME = u'Youku'
														
 
															+
														
 
															+	def __init__(self, downloader=None):
														
 
															+		InfoExtractor.__init__(self, downloader)
														
 
															+
														
 
															+	def report_download_webpage(self, file_id):
														
 
															+		"""Report webpage download."""
														
 
															+		self._downloader.to_screen(u'[Youku] %s: Downloading webpage' % file_id)
														
 
															+
														
 
															+	def report_extraction(self, file_id):
														
 
															+		"""Report information extraction."""
														
 
															+		self._downloader.to_screen(u'[Youku] %s: Extracting information' % file_id)
														
 
															+
														
 
															+	def _gen_sid(self):
														
 
															+		nowTime = int(time.time() * 1000)
														
 
															+		random1 = random.randint(1000,1998)
														
 
															+		random2 = random.randint(1000,9999)
														
 
															+
														
 
															+		return "%d%d%d" %(nowTime,random1,random2)
														
 
															+
														
 
															+	def _get_file_ID_mix_string(self, seed):
														
 
															+		mixed = []
														
 
															+		source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
														
 
															+		seed = float(seed)
														
 
															+		for i in range(len(source)):
														
 
															+			seed  =  (seed * 211 + 30031 ) % 65536
														
 
															+			index  =  math.floor(seed / 65536 * len(source) )
														
 
															+			mixed.append(source[int(index)])
														
 
															+			source.remove(source[int(index)])
														
 
															+		#return ''.join(mixed)
														
 
															+		return mixed
														
 
															+
														
 
															+
														
 
															+	def _get_file_id(self, fileId, seed):
														
 
															+		mixed = self._get_file_ID_mix_string(seed)
														
 
															+		ids = fileId.split('*')
														
 
															+		realId = []
														
 
															+		for ch in ids:
														
 
															+			if ch is not '':
														
 
															+				realId.append(mixed[int(ch)])
														
 
															+		return ''.join(realId)
														
 
															+
														
 
															+	def _gen_key(self, key1, key2):
														
 
															+		pass
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+	def _real_extract(self, url):
														
 
															+		mobj = re.match(self._VALID_URL, url)
														
 
															+		if mobj is None:
														
 
															+			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
														
 
															+			return
														
 
															+		video_id = mobj.group('ID')
														
 
															+
														
 
															+		info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
														
 
															+
														
 
															+		request = urllib2.Request(info_url, None, std_headers)
														
 
															+		try:
														
 
															+			self.report_download_webpage(video_id)
														
 
															+			jsondata = urllib2.urlopen(request).read()
														
 
															+		except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
														
 
															+			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
														
 
															+			return
														
 
															+
														
 
															+		self.report_extraction(video_id)
														
 
															+		try:
														
 
															+			config = json.loads(jsondata)
														
 
															+
														
 
															+			video_title =  config['data'][0]['title']
														
 
															+			seed = config['data'][0]['seed']
														
 
															+
														
 
															+			format = self._downloader.params.get('format', None)
														
 
															+			supported_format = config['data'][0]['streamfileids'].keys()
														
 
															+
														
 
															+			if format is None or format == 'best':
														
 
															+				if 'hd2' in supported_format:
														
 
															+					format = 'hd2'
														
 
															+				else:
														
 
															+					format = 'flv'
														
 
															+				ext = u'flv'
														
 
															+			elif format == 'worst':
														
 
															+				format = 'mp4'
														
 
															+				ext = u'mp4'
														
 
															+			else:
														
 
															+				format = 'flv'
														
 
															+				ext = u'flv'
														
 
															+
														
 
															+
														
 
															+			fileid = config['data'][0]['streamfileids'][format]
														
 
															+			seg_number = len(config['data'][0]['segs'][format])
														
 
															+
														
 
															+			keys=[]
														
 
															+			for i in xrange(seg_number):
														
 
															+				keys.append(config['data'][0]['segs'][format][i]['k'])
														
 
															+
														
 
															+			#TODO check error
														
 
															+			#youku only could be viewed from mainland china
														
 
															+		except:
														
 
															+			self._downloader.trouble(u'ERROR: unable to extract info section')
														
 
															+			return
														
 
															+
														
 
															+		files_info=[]
														
 
															+		sid = self._gen_sid()
														
 
															+		fileid = self._get_file_id(fileid, seed)
														
 
															+
														
 
															+		#column 8,9 of fileid represent the segment number
														
 
															+		#fileid[7:9] should be changed
														
 
															+		for index, key in enumerate(keys):
														
 
															+
														
 
															+			temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
														
 
															+			download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
														
 
															+			print download_url
														
 
															+			info = {
														
 
															+				'id': '%s_part%02d' % (video_id, index),
														
 
															+				'url': download_url,
														
 
															+				'uploader': None,
														
 
															+				'title': video_title,
														
 
															+				'ext': ext,
														
 
															+				'format': u'NA'
														
 
															+			}
														
 
															+			files_info.append(info)
														
 
															+
														
 
															+		return files_info
														
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -351,6 +351,7 @@ def gen_extractors():
 
															 		MixcloudIE(),
														
 
															 		StanfordOpenClassroomIE(),
														
 
															 		MTVIE(),
														
 
															+		YoukuIE(),
														
 
															 		GenericIE()
														
 
															 	]