screenjunkies.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. from __future__ import unicode_literals
  2. import re
  3. import json
  4. from .common import (
  5. InfoExtractor,
  6. ExtractorError,
  7. )
  8. from ..utils import (
  9. int_or_none,
  10. )
  11. class ScreenJunkiesIE(InfoExtractor):
  12. _VALID_URL = r'http://www.screenjunkies.com/video/(.+-(?P<id>\d+)|.+)'
  13. _TESTS = [{
  14. 'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915',
  15. 'info_dict': {
  16. 'id': '2841915',
  17. 'ext': 'mp4',
  18. 'title': 'Best Quentin Tarantino Movie',
  19. },
  20. }, {
  21. 'url': 'http://www.screenjunkies.com/video/honest-trailers-the-dark-knight',
  22. 'info_dict': {
  23. 'id': '2348808',
  24. 'ext': 'mp4',
  25. 'title': "Honest Trailers - 'The Dark Knight'",
  26. },
  27. }, {
  28. 'url': 'http://www.screenjunkies.com/video/knocking-dead-ep-1-the-show-so-far-3003285',
  29. 'only_matching': True,
  30. }]
  31. def _real_extract(self, url):
  32. video_id = self._match_id(url)
  33. if not video_id: # Older urls didn't have the id in them, but we can grab it manually
  34. webpage = self._download_webpage(url, url)
  35. video_id = self._html_search_regex(r'src="/embed/(\d+)"', webpage, 'video id')
  36. webpage = self._download_webpage('http://www.screenjunkies.com/embed/%s' %video_id, video_id)
  37. video_vars_str = self._html_search_regex(r'embedVars = (\{.+\})\s*</script>', webpage, 'video variables', flags=re.S)
  38. video_vars = self._parse_json(video_vars_str, video_id)
  39. # TODO: Figure out required cookies
  40. if video_vars['subscriptionLevel'] > 0:
  41. raise ExtractorError('This video requires ScreenJunkiesPlus', expected=True)
  42. formats = []
  43. for f in video_vars['media']:
  44. if not f['mediaPurpose'] == 'play':
  45. continue
  46. formats.append({
  47. 'url': f['uri'],
  48. 'width': int_or_none(f.get('width')),
  49. 'height': int_or_none(f.get('height')),
  50. 'tbr': int_or_none(f.get('bitRate')),
  51. 'format': 'mp4',
  52. })
  53. self._sort_formats(formats)
  54. return {
  55. 'id': video_id,
  56. 'title': video_vars['contentName'],
  57. 'formats': formats,
  58. 'duration': int_or_none(video_vars.get('videoLengthInSeconds')),
  59. 'thumbnail': video_vars.get('thumbUri'),
  60. 'tags': video_vars['tags'].split(',') if 'tags' in video_vars else [],
  61. }