drtuber.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. class DrTuberIE(InfoExtractor):
  5. _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<title_dash>[\w-]+)'
  6. _TEST = {
  7. 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
  8. 'md5': '93e680cf2536ad0dfb7e74d94a89facd',
  9. 'info_dict': {
  10. 'id': '1740434',
  11. 'ext': 'mp4',
  12. 'title': 'Hot Perky Blonde Naked Golf',
  13. 'categories': list, # NSFW
  14. 'thumbnail': 're:https?://.*\.jpg$',
  15. }
  16. }
  17. def _real_extract(self, url):
  18. mobj = re.match(self._VALID_URL, url)
  19. video_id = mobj.group('id')
  20. webpage = self._download_webpage(url, video_id)
  21. video_url = self._html_search_regex(
  22. r'<source src="([^"]+)"', webpage, 'video URL')
  23. title = self._html_search_regex(
  24. r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
  25. thumbnail = self._html_search_regex(
  26. r'poster="([^"]+)"',
  27. webpage, 'thumbnail', fatal=False)
  28. categories_str = self._html_search_regex(
  29. r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
  30. categories = categories_str.split(' ')
  31. return {
  32. 'id': video_id,
  33. 'url': video_url,
  34. 'title': title,
  35. 'thumbnail': thumbnail,
  36. 'categories': categories,
  37. }