wat.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # coding: utf-8
  2. import json
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. compat_urllib_parse,
  7. )
  8. class WatIE(InfoExtractor):
  9. _VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
  10. IE_NAME = 'wat.tv'
  11. _TEST = {
  12. u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
  13. u'file': u'6bv55.mp4',
  14. u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a',
  15. u'info_dict': {
  16. u"title": u"World War Z - Philadelphia VOST"
  17. }
  18. }
  19. def _real_extract(self, url):
  20. mobj = re.match(self._VALID_URL, url)
  21. short_id = mobj.group('shortID')
  22. player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id,
  23. 'html5': '1'})
  24. player_info = self._download_webpage('http://www.wat.tv/player?' + player_data,
  25. short_id, u'Downloading player info')
  26. player = json.loads(player_info)['player']
  27. html5_player = self._html_search_regex(r'iframe src="(.*?)"', player,
  28. 'html5 player')
  29. player_webpage = self._download_webpage(html5_player, short_id,
  30. u'Downloading player webpage')
  31. video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage,
  32. 'video url')
  33. title = self._search_regex(r'contentTitle : "(.*?)"', player_webpage,
  34. 'title')
  35. thumbnail = self._search_regex(r'previewMedia : "(.*?)"', player_webpage,
  36. 'thumbnail')
  37. return {'id': short_id,
  38. 'url': video_url,
  39. 'ext': 'mp4',
  40. 'title': title,
  41. 'thumbnail': thumbnail,
  42. }
  43. class TF1IE(InfoExtractor):
  44. """
  45. TF1 uses the wat.tv player, currently it can only download videos with the
  46. html5 player enabled, it cannot download HD videos or the news.
  47. """
  48. _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
  49. _TEST = {
  50. u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
  51. u'file': u'6bysb.mp4',
  52. u'md5': u'66789d3e91278d332f75e1feb7aea327',
  53. u'info_dict': {
  54. u"title": u"Citroën Grand C4 Picasso 2013 : présentation officielle"
  55. }
  56. }
  57. def _real_extract(self, url):
  58. mobj = re.match(self._VALID_URL, url)
  59. id = mobj.group(1)
  60. webpage = self._download_webpage(url, id)
  61. embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
  62. webpage, 'embed url')
  63. embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
  64. wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
  65. wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
  66. wat_info = json.loads(wat_info)['media']
  67. wat_url = wat_info['url']
  68. return self.url_result(wat_url, 'Wat')