breakcom.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. from __future__ import unicode_literals
  2. import re
  3. import json
  4. from .common import InfoExtractor
  5. class BreakIE(InfoExtractor):
  6. _VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
  7. _TESTS = [{
  8. 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
  9. 'md5': '33aa4ff477ecd124d18d7b5d23b87ce5',
  10. 'info_dict': {
  11. 'id': '2468056',
  12. 'ext': 'mp4',
  13. 'title': 'When Girls Act Like D-Bags',
  14. }
  15. }, {
  16. 'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
  17. 'only_matching': True,
  18. }]
  19. def _real_extract(self, url):
  20. video_id = self._match_id(url)
  21. webpage = self._download_webpage(
  22. 'http://www.break.com/embed/%s' % video_id, video_id)
  23. info = json.loads(self._search_regex(
  24. r'var embedVars = ({.*})\s*?</script>',
  25. webpage, 'info json', flags=re.DOTALL))
  26. video_url = info['videoUri']
  27. youtube_id = info.get('youtubeId')
  28. if youtube_id:
  29. return self.url_result(youtube_id, 'Youtube')
  30. final_url = video_url + '?' + info['AuthToken']
  31. return {
  32. 'id': video_id,
  33. 'url': final_url,
  34. 'title': info['contentName'],
  35. 'thumbnail': info['thumbUri'],
  36. }