2
0

veoh.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. from __future__ import unicode_literals
  2. import re
  3. import json
  4. from .common import InfoExtractor
  5. from ..utils import compat_urllib_request
  6. class VeohIE(InfoExtractor):
  7. _VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/v(?P<id>\d*)'
  8. _TEST = {
  9. 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
  10. 'file': '56314296.mp4',
  11. 'md5': '620e68e6a3cff80086df3348426c9ca3',
  12. 'info_dict': {
  13. 'title': 'Straight Backs Are Stronger',
  14. 'uploader': 'LUMOback',
  15. 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
  16. }
  17. }
  18. def _real_extract(self, url):
  19. mobj = re.match(self._VALID_URL, url)
  20. video_id = mobj.group('id')
  21. webpage = self._download_webpage(url, video_id)
  22. age_limit = 0
  23. if 'class="adultwarning-container"' in webpage:
  24. self.report_age_confirmation()
  25. age_limit = 18
  26. request = compat_urllib_request.Request(url)
  27. request.add_header('Cookie', 'confirmedAdult=true')
  28. webpage = self._download_webpage(request, video_id)
  29. m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
  30. if m_youtube is not None:
  31. youtube_id = m_youtube.group(1)
  32. self.to_screen('%s: detected Youtube video.' % video_id)
  33. return self.url_result(youtube_id, 'Youtube')
  34. self.report_extraction(video_id)
  35. info = self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info')
  36. info = json.loads(info)
  37. video_url = info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath')
  38. return {
  39. 'id': info['videoId'],
  40. 'title': info['title'],
  41. 'url': video_url,
  42. 'uploader': info['username'],
  43. 'thumbnail': info.get('highResImage') or info.get('medResImage'),
  44. 'description': info['description'],
  45. 'view_count': info['views'],
  46. 'age_limit': age_limit,
  47. }