youku.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. import json
  2. import math
  3. import random
  4. import re
  5. import time
  6. from .common import InfoExtractor
  7. from ..utils import (
  8. ExtractorError,
  9. )
  10. class YoukuIE(InfoExtractor):
  11. _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
  12. def _gen_sid(self):
  13. nowTime = int(time.time() * 1000)
  14. random1 = random.randint(1000,1998)
  15. random2 = random.randint(1000,9999)
  16. return "%d%d%d" %(nowTime,random1,random2)
  17. def _get_file_ID_mix_string(self, seed):
  18. mixed = []
  19. source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
  20. seed = float(seed)
  21. for i in range(len(source)):
  22. seed = (seed * 211 + 30031 ) % 65536
  23. index = math.floor(seed / 65536 * len(source) )
  24. mixed.append(source[int(index)])
  25. source.remove(source[int(index)])
  26. #return ''.join(mixed)
  27. return mixed
  28. def _get_file_id(self, fileId, seed):
  29. mixed = self._get_file_ID_mix_string(seed)
  30. ids = fileId.split('*')
  31. realId = []
  32. for ch in ids:
  33. if ch:
  34. realId.append(mixed[int(ch)])
  35. return ''.join(realId)
  36. def _real_extract(self, url):
  37. mobj = re.match(self._VALID_URL, url)
  38. if mobj is None:
  39. raise ExtractorError(u'Invalid URL: %s' % url)
  40. video_id = mobj.group('ID')
  41. info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
  42. jsondata = self._download_webpage(info_url, video_id)
  43. self.report_extraction(video_id)
  44. try:
  45. config = json.loads(jsondata)
  46. video_title = config['data'][0]['title']
  47. seed = config['data'][0]['seed']
  48. format = self._downloader.params.get('format', None)
  49. supported_format = list(config['data'][0]['streamfileids'].keys())
  50. if format is None or format == 'best':
  51. if 'hd2' in supported_format:
  52. format = 'hd2'
  53. else:
  54. format = 'flv'
  55. ext = u'flv'
  56. elif format == 'worst':
  57. format = 'mp4'
  58. ext = u'mp4'
  59. else:
  60. format = 'flv'
  61. ext = u'flv'
  62. fileid = config['data'][0]['streamfileids'][format]
  63. keys = [s['k'] for s in config['data'][0]['segs'][format]]
  64. except (UnicodeDecodeError, ValueError, KeyError):
  65. raise ExtractorError(u'Unable to extract info section')
  66. files_info=[]
  67. sid = self._gen_sid()
  68. fileid = self._get_file_id(fileid, seed)
  69. #column 8,9 of fileid represent the segment number
  70. #fileid[7:9] should be changed
  71. for index, key in enumerate(keys):
  72. temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
  73. download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
  74. info = {
  75. 'id': '%s_part%02d' % (video_id, index),
  76. 'url': download_url,
  77. 'uploader': None,
  78. 'upload_date': None,
  79. 'title': video_title,
  80. 'ext': ext,
  81. }
  82. files_info.append(info)
  83. return files_info