hypestat.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import os.path
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. compat_urllib_parse,
  8. compat_urllib_request,
  9. )
  10. class HypestatIE(InfoExtractor):
  11. IE_DESC = 'allmyvideos.net and vidspot.net'
  12. _VALID_URL = r'https?://(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)'
  13. _TESTS = [{
  14. 'url': 'http://allmyvideos.net/jih3nce3x6wn',
  15. 'md5': '710883dee1bfc370ecf9fa6a89307c88',
  16. 'info_dict': {
  17. 'id': 'jih3nce3x6wn',
  18. 'ext': 'mp4',
  19. 'title': 'youtube-dl test video',
  20. },
  21. }, {
  22. 'url': 'http://vidspot.net/l2ngsmhs8ci5',
  23. 'md5': '710883dee1bfc370ecf9fa6a89307c88',
  24. 'info_dict': {
  25. 'id': 'l2ngsmhs8ci5',
  26. 'ext': 'mp4',
  27. 'title': 'youtube-dl test video',
  28. },
  29. }]
  30. def _real_extract(self, url):
  31. mobj = re.match(self._VALID_URL, url)
  32. video_id = mobj.group('id')
  33. orig_webpage = self._download_webpage(url, video_id)
  34. fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
  35. data = dict(fields)
  36. post = compat_urllib_parse.urlencode(data)
  37. headers = {
  38. b'Content-Type': b'application/x-www-form-urlencoded',
  39. }
  40. req = compat_urllib_request.Request(url, post, headers)
  41. webpage = self._download_webpage(
  42. req, video_id, note='Downloading video page ...')
  43. title = os.path.splitext(data['fname'])[0]
  44. #Could be several links with different quality
  45. links = re.findall(r'"file" : "?(.+?)",', webpage)
  46. # Assume the links are ordered in quality
  47. formats = [{
  48. 'url': l,
  49. 'quality': i,
  50. } for i, l in enumerate(links)]
  51. self._sort_formats(formats)
  52. return {
  53. 'id': video_id,
  54. 'title': title,
  55. 'formats': formats,
  56. }