vbox7.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..compat import compat_urlparse
  5. from ..utils import (
  6. ExtractorError,
  7. sanitized_Request,
  8. urlencode_postdata,
  9. )
  10. class Vbox7IE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'
  12. _TESTS = [{
  13. 'url': 'http://vbox7.com/play:0946fff23c',
  14. 'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
  15. 'info_dict': {
  16. 'id': '0946fff23c',
  17. 'ext': 'mp4',
  18. 'title': 'Борисов: Притеснен съм за бъдещето на България',
  19. },
  20. }, {
  21. 'url': 'http://vbox7.com/play:249bb972c2',
  22. 'md5': '99f65c0c9ef9b682b97313e052734c3f',
  23. 'info_dict': {
  24. 'id': '249bb972c2',
  25. 'ext': 'mp4',
  26. 'title': 'Смях! Чудо - чист за секунди - Скрита камера',
  27. },
  28. 'skip': 'georestricted',
  29. }]
  30. def _real_extract(self, url):
  31. video_id = self._match_id(url)
  32. webpage = self._download_webpage(url, video_id)
  33. title = self._html_search_regex(
  34. r'<title>(.*)</title>', webpage, 'title').split('/')[0].strip()
  35. video_url = self._search_regex(
  36. r'src\s*:\s*(["\'])(?P<url>.+?.mp4.*?)\1',
  37. webpage, 'video url', default=None, group='url')
  38. thumbnail_url = self._og_search_thumbnail(webpage)
  39. if not video_url:
  40. info_response = self._download_webpage(
  41. 'http://vbox7.com/play/magare.do', video_id,
  42. 'Downloading info webpage',
  43. data=urlencode_postdata({'as3': '1', 'vid': video_id}),
  44. headers={'Content-Type': 'application/x-www-form-urlencoded'})
  45. final_url, thumbnail_url = map(
  46. lambda x: x.split('=')[1], info_response.split('&'))
  47. if '/na.mp4' in video_url:
  48. self.raise_geo_restricted()
  49. return {
  50. 'id': video_id,
  51. 'url': self._proto_relative_url(video_url, 'http:'),
  52. 'title': title,
  53. 'thumbnail': thumbnail_url,
  54. }