spankbang.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. orderedSet,
  7. parse_duration,
  8. parse_resolution,
  9. str_to_int,
  10. )
  11. class SpankBangIE(InfoExtractor):
  12. _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
  13. _TESTS = [{
  14. 'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
  15. 'md5': '1cc433e1d6aa14bc376535b8679302f7',
  16. 'info_dict': {
  17. 'id': '3vvn',
  18. 'ext': 'mp4',
  19. 'title': 'fantasy solo',
  20. 'description': 'dillion harper masturbates on a bed',
  21. 'thumbnail': r're:^https?://.*\.jpg$',
  22. 'uploader': 'silly2587',
  23. 'age_limit': 18,
  24. }
  25. }, {
  26. # 480p only
  27. 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
  28. 'only_matching': True,
  29. }, {
  30. # no uploader
  31. 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2',
  32. 'only_matching': True,
  33. }, {
  34. # mobile page
  35. 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
  36. 'only_matching': True,
  37. }, {
  38. # 4k
  39. 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
  40. 'only_matching': True,
  41. }, {
  42. 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/',
  43. 'only_matching': True,
  44. }, {
  45. 'url': 'https://m.spankbang.com/3vvn/play',
  46. 'only_matching': True,
  47. }, {
  48. 'url': 'https://spankbang.com/2y3td/embed/',
  49. 'only_matching': True,
  50. }]
  51. def _real_extract(self, url):
  52. video_id = self._match_id(url)
  53. webpage = self._download_webpage(
  54. url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
  55. video_id, headers={'Cookie': 'country=US'})
  56. if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
  57. raise ExtractorError(
  58. 'Video %s is not available' % video_id, expected=True)
  59. formats = []
  60. for mobj in re.finditer(
  61. r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
  62. webpage):
  63. format_id, format_url = mobj.group('id', 'url')
  64. f = parse_resolution(format_id)
  65. f.update({
  66. 'url': format_url,
  67. 'format_id': format_id,
  68. })
  69. formats.append(f)
  70. self._sort_formats(formats)
  71. title = self._html_search_regex(
  72. r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
  73. description = self._search_regex(
  74. r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
  75. webpage, 'description', fatal=False)
  76. thumbnail = self._og_search_thumbnail(webpage)
  77. uploader = self._search_regex(
  78. r'class="user"[^>]*><img[^>]+>([^<]+)',
  79. webpage, 'uploader', default=None)
  80. duration = parse_duration(self._search_regex(
  81. r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
  82. webpage, 'duration', fatal=False))
  83. view_count = str_to_int(self._search_regex(
  84. r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False))
  85. age_limit = self._rta_search(webpage)
  86. return {
  87. 'id': video_id,
  88. 'title': title,
  89. 'description': description,
  90. 'thumbnail': thumbnail,
  91. 'uploader': uploader,
  92. 'duration': duration,
  93. 'view_count': view_count,
  94. 'formats': formats,
  95. 'age_limit': age_limit,
  96. }
  97. class SpankBangPlaylistIE(InfoExtractor):
  98. _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
  99. _TEST = {
  100. 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
  101. 'info_dict': {
  102. 'id': 'ug0k',
  103. 'title': 'Big Ass Titties',
  104. },
  105. 'playlist_mincount': 50,
  106. }
  107. def _real_extract(self, url):
  108. playlist_id = self._match_id(url)
  109. webpage = self._download_webpage(
  110. url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
  111. entries = [self.url_result(
  112. 'https://spankbang.com/%s/video' % video_id,
  113. ie=SpankBangIE.ie_key(), video_id=video_id)
  114. for video_id in orderedSet(re.findall(
  115. r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
  116. title = self._html_search_regex(
  117. r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
  118. fatal=False)
  119. return self.playlist_result(entries, playlist_id, title)