dump.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. ExtractorError,
  7. )
  8. class DumpIE(InfoExtractor):
  9. _VALID_URL = r'^https?://(?:www\.)?dump\.com/(?P<id>[a-zA-Z0-9]+)/'
  10. def _real_extract(self, url):
  11. m = re.match(self._VALID_URL, url)
  12. video_id = m.group('id')
  13. # Note: There is an easier-to-parse configuration at
  14. # http://www.aparat.com/video/video/config/videohash/%video_id
  15. # but the URL in there does not work
  16. webpage = self._download_webpage(url, video_id)
  17. try:
  18. video_url = re.findall(r'file","(.+?.flv)"', webpage)[-1]
  19. except IndexError:
  20. raise ExtractorError(u'No video URL found')
  21. thumb = re.findall('<meta property="og:image" content="(.+?)"',webpage)[0]
  22. title = self._search_regex(r'<b>([^"]+)</b>', webpage, u'title')
  23. return {
  24. 'id': video_id,
  25. 'title': title,
  26. 'url': video_url,
  27. 'ext': 'flv',
  28. 'thumbnail': thumb,
  29. }