clipfish.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. from __future__ import unicode_literals
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. int_or_none,
  6. js_to_json,
  7. determine_ext,
  8. )
  9. class ClipfishIE(InfoExtractor):
  10. IE_NAME = 'clipfish'
  11. _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
  12. _TEST = {
  13. 'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
  14. 'md5': '79bc922f3e8a9097b3d68a93780fd475',
  15. 'info_dict': {
  16. 'id': '3966754',
  17. 'ext': 'mp4',
  18. 'title': 'FIFA 14 - E3 2013 Trailer',
  19. 'duration': 82,
  20. }
  21. }
  22. def _real_extract(self, url):
  23. video_id = self._match_id(url)
  24. webpage = self._download_webpage(url, video_id)
  25. video_info = self._parse_json(
  26. js_to_json(self._html_search_regex('var videoObject = ({[^}]+?})', webpage, 'videoObject')),
  27. video_id
  28. )
  29. info_url = self._parse_json(
  30. js_to_json(self._html_search_regex('var globalFlashvars = ({[^}]+?})', webpage, 'globalFlashvars')),
  31. video_id
  32. )['data']
  33. doc = self._download_xml(
  34. info_url, video_id, note='Downloading info page')
  35. title = doc.find('title').text
  36. video_url = doc.find('filename').text
  37. thumbnail = doc.find('imageurl').text
  38. duration = int_or_none(video_info['length'])
  39. formats = [{'url': video_info['videourl']},{'url': video_url}]
  40. self._sort_formats(formats)
  41. return {
  42. 'id': video_id,
  43. 'title': title,
  44. 'formats': formats,
  45. 'thumbnail': thumbnail,
  46. 'duration': duration,
  47. }