vrak.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from .brightcove import BrightcoveNewIE
  6. class VrakIE(InfoExtractor):
  7. _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?target=(?P<id>[0-9\.]+).*'
  8. _TEST = {
  9. 'url': 'http://www.vrak.tv/videos?target=1.2240923&filtre=emission&id=1.1806721',
  10. 'md5': 'c5d5ce237bca3b1e990ce1b48d1f0948',
  11. 'info_dict': {
  12. 'id': '5231040869001',
  13. 'ext': 'mp4',
  14. 'title': 'Référendums américains, animés japonais et hooligans russes',
  15. 'upload_date': '20161201',
  16. 'description': 'This video file has been uploaded automatically using Oprah. It should be updated with real description soon.',
  17. 'timestamp': 1480628425,
  18. 'uploader_id': '2890187628001',
  19. }
  20. }
  21. def _real_extract(self, url):
  22. url_id = self._match_id(url)
  23. webpage = self._download_webpage(url, url_id)
  24. result = {}
  25. result['title'] = self._html_search_regex(
  26. r'<h3 class="videoTitle">(.+?)</h3>', webpage, 'title')
  27. # Inspired from BrightcoveNewIE._extract_url()
  28. entries = []
  29. for account_id, player_id, _, video_id in re.findall(
  30. # account_id, player_id and embed from:
  31. # <div class="video-player [...]
  32. # data-publisher-id="2890187628001"
  33. # data-player-id="VkSnGw3cx"
  34. # video id is extracted from weird CMS Java/Javascript notation:
  35. # RW java.lang.String value = '5231040869001';
  36. # Need to use backtrack to pin to a ref since video is in grid
  37. # layout with others
  38. r'''(?sx)
  39. <div[^>]+
  40. data-publisher-id=["\'](\d+)["\']
  41. [^>]*
  42. data-player-id=["\']([^"\']+)["\']
  43. [^>]*
  44. refId&quot;:&quot;([^&]+)&quot;
  45. [^>]*
  46. >.*?
  47. </div>.*?
  48. RW\ java\.lang\.String\ value\ =\ \'brightcove\.article\.\d+\.\3\'
  49. [^>]*
  50. RW\ java\.lang\.String\ value\ =\ \'(\d+)\'
  51. ''', webpage):
  52. entries.append(
  53. 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
  54. % (account_id, player_id, 'default', video_id))
  55. if entries:
  56. result = self.url_result(entries[0], BrightcoveNewIE.ie_key())
  57. return result