collegehumor.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. import json
  2. import re
  3. from .common import InfoExtractor
  4. class CollegeHumorIE(InfoExtractor):
  5. _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
  6. _TESTS = [{
  7. u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
  8. u'file': u'6902724.mp4',
  9. u'md5': u'dcc0f5c1c8be98dc33889a191f4c26bd',
  10. u'info_dict': {
  11. u'title': u'Comic-Con Cosplay Catastrophe',
  12. u'description': u'Fans get creative this year at San Diego. Too',
  13. },
  14. },
  15. {
  16. u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
  17. u'file': u'3505939.mp4',
  18. u'md5': u'72fa701d8ef38664a4dbb9e2ab721816',
  19. u'info_dict': {
  20. u'title': u'Font Conference',
  21. u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
  22. },
  23. }]
  24. def _real_extract(self, url):
  25. mobj = re.match(self._VALID_URL, url)
  26. video_id = mobj.group('videoid')
  27. jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json'
  28. data = json.loads(self._download_webpage(
  29. jsonUrl, video_id, u'Downloading info JSON'))
  30. vdata = data['video']
  31. PREFS = {'high_quality': 2, 'low_quality': 0}
  32. formats = []
  33. for format_key in ('mp4', 'webm'):
  34. for qname, qurl in vdata[format_key].items():
  35. formats.append({
  36. 'format_id': format_key + '_' + qname,
  37. 'url': qurl,
  38. 'format': format_key,
  39. 'preference': PREFS.get(qname),
  40. })
  41. self._sort_formats(formats)
  42. return {
  43. 'id': video_id,
  44. 'title': vdata['title'],
  45. 'description': vdata.get('description'),
  46. 'thumbnail': vdata.get('thumbnail'),
  47. 'formats': formats,
  48. }