gorillavid.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. class GorillaVidIE(InfoExtractor):
  6. _VALID_URL = r'https?://(?:www.)?gorillavid.in/(?:embed-)?(?P<id>\w+)(?:\-\d+x\d+)?(?:.html)?'
  7. _TEST = {
  8. 'url': "http://gorillavid.in/z08zf8le23c6",
  9. 'md5': 'c9e293ca74d46cad638e199c3f3fe604',
  10. 'info_dict': {
  11. 'id': 'z08zf8le23c6',
  12. 'ext': 'mp4',
  13. 'title': 'Say something nice',
  14. }
  15. }
  16. def _real_extract(self, url):
  17. mobj = re.match(self._VALID_URL, url)
  18. video_id = mobj.group('id')
  19. webpage = self._download_webpage(url, video_id)
  20. title = self._html_search_regex(r"name=['\"]fname['\"]\s+value=['\"](.*?)['\"]", webpage, u"video title")
  21. # download embed page again with cookies to get url
  22. embed_url = "http://gorillavid.in/embed-{0}-960x480.html".format(video_id)
  23. webpage = self._download_webpage(embed_url, video_id, note=u'Downloading webpage again (with cookie)')
  24. url = self._html_search_regex(r'file:\s+["\'](http://.*?video.\w{3})["\']', webpage, url)
  25. info_dict = {
  26. 'id': video_id,
  27. 'title': title,
  28. 'url': url,
  29. }
  30. return info_dict