drtuber.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. class DrTuberIE(InfoExtractor):
  5. _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<title_dash>[\w-]+)'
  6. _TEST = {
  7. 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
  8. 'md5': '93e680cf2536ad0dfb7e74d94a89facd',
  9. 'info_dict': {
  10. 'id': '1740434',
  11. 'ext': 'mp4',
  12. 'title': 'Hot Perky Blonde Naked Golf',
  13. 'categories': list, # NSFW
  14. 'thumbnail': 're:https?://.*\.jpg$',
  15. 'age_limit': 18,
  16. }
  17. }
  18. def _real_extract(self, url):
  19. mobj = re.match(self._VALID_URL, url)
  20. video_id = mobj.group('id')
  21. webpage = self._download_webpage(url, video_id)
  22. video_url = self._html_search_regex(
  23. r'<source src="([^"]+)"', webpage, 'video URL')
  24. title = self._html_search_regex(
  25. r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
  26. thumbnail = self._html_search_regex(
  27. r'poster="([^"]+)"',
  28. webpage, 'thumbnail', fatal=False)
  29. cats_str = self._html_search_regex(
  30. r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
  31. categories = None if cats_str is None else cats_str.split(' ')
  32. return {
  33. 'id': video_id,
  34. 'url': video_url,
  35. 'title': title,
  36. 'thumbnail': thumbnail,
  37. 'categories': categories,
  38. 'age_limit': self._rta_search(webpage),
  39. }