vice.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from .ooyala import OoyalaIE
  5. from ..utils import ExtractorError
  6. class ViceIE(InfoExtractor):
  7. _VALID_URL = r'https?://(.+?\.)?vice\.com/.*?/(?P<name>.+)'
  8. _TESTS = [
  9. {
  10. 'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
  11. 'info_dict': {
  12. 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
  13. 'ext': 'mp4',
  14. 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
  15. },
  16. 'params': {
  17. # Requires ffmpeg (m3u8 manifest)
  18. 'skip_download': True,
  19. },
  20. }, {
  21. 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
  22. 'info_dict': {
  23. 'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',
  24. 'ext': 'mp4',
  25. 'title': 'VICE News - Inside the Monkey Lab',
  26. 'description': 'md5:1f660d467d3515f29d11e5ef742a4b82',
  27. },
  28. 'params': {
  29. # Requires ffmpeg (m3u8 manifest)
  30. 'skip_download': True,
  31. },
  32. }
  33. ]
  34. def _real_extract(self, url):
  35. mobj = re.match(self._VALID_URL, url)
  36. name = mobj.group('name')
  37. webpage = self._download_webpage(url, name)
  38. try:
  39. embed_code = self._search_regex(
  40. r'embedCode=([^&\'"]+)', webpage,
  41. 'ooyala embed code')
  42. ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
  43. except ExtractorError:
  44. raise ExtractorError('The page doesn\'t contain a video', expected=True)
  45. return self.url_result(ooyala_url, ie='Ooyala')