cammodels.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. from __future__ import unicode_literals
  2. from .common import InfoExtractor
  3. from .common import ExtractorError
  4. import json
  5. import re
  6. from ..utils import int_or_none
  7. class CamModelsIE(InfoExtractor):
  8. _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>\w+)'
  9. _HEADERS = {
  10. 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
  11. # Needed because server doesn't return links to video URLs if a browser-like User-Agent is not used
  12. }
  13. def _real_extract(self, url):
  14. video_id = self._match_id(url)
  15. webpage = self._download_webpage(
  16. url,
  17. video_id,
  18. headers=self._HEADERS)
  19. manifest_url_root = self._html_search_regex(
  20. r'manifestUrlRoot=(?P<id>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))',
  21. webpage,
  22. 'manifest',
  23. None,
  24. False)
  25. if not manifest_url_root:
  26. offline = self._html_search_regex(
  27. r'(?P<id>I\'m offline, but let\'s stay connected!)',
  28. webpage,
  29. 'offline indicator',
  30. None,
  31. False)
  32. private = self._html_search_regex(
  33. r'(?P<id>I’m in a private show right now)',
  34. webpage,
  35. 'private show indicator',
  36. None,
  37. False)
  38. err = 'This user is currently offline, so nothing can be downloaded.' if offline \
  39. else 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.' if private \
  40. else 'Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.'
  41. raise ExtractorError(
  42. err,
  43. expected=True if offline or private else False,
  44. video_id=video_id
  45. )
  46. manifest_url = manifest_url_root + video_id + '.json'
  47. manifest = self._download_json(
  48. manifest_url,
  49. video_id,
  50. 'Downloading links to streams.',
  51. 'Link to stream URLs was found, but we couldn\'t access it.',
  52. headers=self._HEADERS)
  53. try:
  54. formats = []
  55. for fmtName in ['mp4-rtmp', 'mp4-hls']:
  56. for encoding in manifest['formats'][fmtName]['encodings']:
  57. formats.append({
  58. 'ext': 'mp4',
  59. 'url': encoding['location'],
  60. 'width': int_or_none(encoding.get('videoWidth')),
  61. 'height': int_or_none(encoding.get('videoHeight')),
  62. 'vbr': int_or_none(encoding.get('videoKbps')),
  63. 'abr': int_or_none(encoding.get('audioKbps')),
  64. 'format_id': fmtName + str(encoding.get('videoWidth'))
  65. })
  66. # If they change the JSON format, then fallback to parsing out RTMP links via regex.
  67. except KeyError:
  68. manifest_json = json.dumps(manifest)
  69. manifest_links = re.finditer(
  70. r'(?P<id>rtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))',
  71. manifest_json)
  72. if not manifest_links:
  73. raise ExtractorError(
  74. 'Link to stream info was found, but we couldn\'t read the response. This is probably a bug.',
  75. expected=False,
  76. video_id=video_id)
  77. formats = []
  78. for manifest_link in manifest_links:
  79. url = manifest_link.group('id')
  80. formats.append({
  81. 'ext': 'mp4',
  82. 'url': url,
  83. 'format_id': url.split(sep='/')[-1]
  84. })
  85. self._sort_formats(formats)
  86. return {
  87. 'id': video_id,
  88. 'title': self._live_title(video_id),
  89. 'formats': formats
  90. }