moniker.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import os.path
  4. import re
  5. from .common import InfoExtractor
  6. from ..compat import (
  7. compat_urllib_parse,
  8. compat_urllib_request,
  9. )
  10. from ..utils import (
  11. ExtractorError,
  12. remove_start,
  13. )
  14. class MonikerIE(InfoExtractor):
  15. IE_DESC = 'allmyvideos.net and vidspot.net'
  16. _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?:(?:2|v)/v-)?(?P<id>[a-zA-Z0-9_-]+)'
  17. _TESTS = [{
  18. 'url': 'http://allmyvideos.net/jih3nce3x6wn',
  19. 'md5': '710883dee1bfc370ecf9fa6a89307c88',
  20. 'info_dict': {
  21. 'id': 'jih3nce3x6wn',
  22. 'ext': 'mp4',
  23. 'title': 'youtube-dl test video',
  24. },
  25. }, {
  26. 'url': 'http://allmyvideos.net/embed-jih3nce3x6wn',
  27. 'md5': '710883dee1bfc370ecf9fa6a89307c88',
  28. 'info_dict': {
  29. 'id': 'jih3nce3x6wn',
  30. 'ext': 'mp4',
  31. 'title': 'youtube-dl test video',
  32. },
  33. }, {
  34. 'url': 'http://vidspot.net/l2ngsmhs8ci5',
  35. 'md5': '710883dee1bfc370ecf9fa6a89307c88',
  36. 'info_dict': {
  37. 'id': 'l2ngsmhs8ci5',
  38. 'ext': 'mp4',
  39. 'title': 'youtube-dl test video',
  40. },
  41. }, {
  42. 'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
  43. 'only_matching': True,
  44. }]
  45. def _real_extract(self, url):
  46. orig_video_id = self._match_id(url)
  47. video_id = remove_start(orig_video_id, 'embed-')
  48. url = url.replace(orig_video_id, video_id)
  49. assert re.match(self._VALID_URL, url) is not None
  50. orig_webpage = self._download_webpage(url, video_id)
  51. if '>File Not Found<' in orig_webpage:
  52. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  53. error = self._search_regex(
  54. r'class="err">([^<]+)<', orig_webpage, 'error', default=None)
  55. if error:
  56. raise ExtractorError(
  57. '%s returned error: %s' % (self.IE_NAME, error), expected=True)
  58. builtin_url = self._search_regex(
  59. r'<iframe[^>]+src=(["\'])(?P<url>.+?/builtin-.+?)\1',
  60. orig_webpage, 'builtin URL', default=None, group='url')
  61. if builtin_url:
  62. req = compat_urllib_request.Request(builtin_url)
  63. req.add_header('Referer', url)
  64. webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
  65. title = self._og_search_title(orig_webpage).strip()
  66. description = self._og_search_description(orig_webpage).strip()
  67. else:
  68. fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
  69. data = dict(fields)
  70. post = compat_urllib_parse.urlencode(data)
  71. headers = {
  72. b'Content-Type': b'application/x-www-form-urlencoded',
  73. }
  74. req = compat_urllib_request.Request(url, post, headers)
  75. webpage = self._download_webpage(
  76. req, video_id, note='Downloading video page ...')
  77. title = os.path.splitext(data['fname'])[0]
  78. description = None
  79. # Could be several links with different quality
  80. links = re.findall(r'"file" : "?(.+?)",', webpage)
  81. # Assume the links are ordered in quality
  82. formats = [{
  83. 'url': l,
  84. 'quality': i,
  85. } for i, l in enumerate(links)]
  86. self._sort_formats(formats)
  87. return {
  88. 'id': video_id,
  89. 'title': title,
  90. 'description': description,
  91. 'formats': formats,
  92. }