ign.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. import re
  2. import json
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. determine_ext,
  6. )
  7. class IGNIE(InfoExtractor):
  8. _VALID_URL = r'http://www.ign.com/videos/.+/(?P<name>.+)'
  9. IE_NAME = u'ign.com'
  10. _TEST = {
  11. u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
  12. u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
  13. u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
  14. u'info_dict': {
  15. u'title': u'The Last of Us Review',
  16. u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
  17. }
  18. }
  19. def _real_extract(self, url):
  20. mobj = re.match(self._VALID_URL, url)
  21. name = mobj.group('name')
  22. config_url = url + '.config'
  23. webpage = self._download_webpage(url, name)
  24. config = json.loads(self._download_webpage(config_url, name, u'Downloading video info'))
  25. self.report_extraction(name)
  26. description = self._html_search_regex(r'<span class="page-object-description">(.+?)</span>',
  27. webpage, 'video description', flags=re.DOTALL)
  28. media = config['playlist']['media']
  29. video_url = media['url']
  30. return {'id': media['metadata']['videoId'],
  31. 'url': video_url,
  32. 'ext': determine_ext(video_url),
  33. 'title': media['metadata']['title'],
  34. 'description': description,
  35. 'thumbnail': media['poster'][0]['url'].replace('{size}', 'small'),
  36. }