promptfile.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. determine_ext,
  7. ExtractorError,
  8. sanitized_Request,
  9. urlencode_postdata,
  10. )
  11. class PromptFileIE(InfoExtractor):
  12. _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
  13. _TEST = {
  14. 'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416',
  15. 'md5': '5a7e285a26e0d66d9a263fae91bc92ce',
  16. 'info_dict': {
  17. 'id': '86D1CE8462-576CAAE416',
  18. 'ext': 'mp4',
  19. 'title': 'oceans.mp4',
  20. 'thumbnail': 're:^https?://.*\.jpg$',
  21. }
  22. }
  23. def _real_extract(self, url):
  24. video_id = self._match_id(url)
  25. webpage = self._download_webpage(url, video_id)
  26. if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None:
  27. raise ExtractorError('Video %s does not exist' % video_id,
  28. expected=True)
  29. chash_pattern = r'\$\("#chash"\)\.val\("(.+)"\+\$\("#chash"\)'
  30. chash = self._html_search_regex(chash_pattern, webpage, "chash")
  31. fields = self._hidden_inputs(webpage)
  32. k = list(fields)[0]
  33. fields[k] = chash + fields[k]
  34. post = urlencode_postdata(fields)
  35. req = sanitized_Request(url, post)
  36. req.add_header('Content-type', 'application/x-www-form-urlencoded')
  37. webpage = self._download_webpage(
  38. req, video_id, 'Downloading video page')
  39. url_pattern = r'<a href="(http://www\.promptfile\.com/file/[^"]+)'
  40. url = self._html_search_regex(url_pattern, webpage, 'URL')
  41. title = self._html_search_regex(
  42. r'<span.+title="([^"]+)">', webpage, 'title')
  43. thumbnail = self._html_search_regex(
  44. r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"',
  45. webpage, 'thumbnail', fatal=False, flags=re.DOTALL)
  46. formats = [{
  47. 'format_id': 'sd',
  48. 'url': url,
  49. 'ext': determine_ext(title),
  50. }]
  51. self._sort_formats(formats)
  52. return {
  53. 'id': video_id,
  54. 'title': title,
  55. 'thumbnail': thumbnail,
  56. 'formats': formats,
  57. }