gdcvault.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. from __future__ import unicode_literals
  2. import re
  3. import json
  4. import xml.etree.ElementTree
  5. from .common import InfoExtractor
  6. from ..utils import unified_strdate
  7. class GDCVaultIE(InfoExtractor):
  8. _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
  9. _TEST = {
  10. u'url': u'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
  11. u'md5': u'7ce8388f544c88b7ac11c7ab1b593704',
  12. u'info_dict': {
  13. u"id": u"1019721",
  14. u"ext": u"mp4",
  15. u"title": u"Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)"
  16. }
  17. }
  18. def _real_extract(self, url):
  19. mobj = re.match(self._VALID_URL, url)
  20. video_id = mobj.group('id')
  21. webpage_url = 'http://www.gdcvault.com/play/' + video_id
  22. start_page = self._download_webpage(webpage_url, video_id)
  23. self.report_extraction(video_id)
  24. xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
  25. xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
  26. xml_decription_url = xml_root + 'xml/' + xml_name
  27. xml_description = self._download_xml(xml_decription_url, video_id)
  28. video_title = xml_description.find('./metadata/title').text
  29. mp4_video = xml_description.find('./metadata/mp4video').text
  30. mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video)
  31. video_root = mobj.group('root')
  32. formats = xml_description.findall('./metadata/MBRVideos/MBRVideo')
  33. video_formats = []
  34. for format in formats:
  35. mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text)
  36. url = video_root + mobj.group('path')
  37. vbr = format.find('bitrate').text
  38. video_formats.append({
  39. 'url': url,
  40. 'vbr': int(vbr),
  41. })
  42. return [{
  43. 'id': video_id,
  44. 'formats': video_formats,
  45. 'title': video_title,
  46. }]