rdsca.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. parse_iso8601,
  6. url_basename,
  7. )
  8. class RDScaIE(InfoExtractor):
  9. IE_NAME = 'RDS.ca'
  10. _VALID_URL = r'http://(?:www\.)?rds\.ca/videos/(?P<id>.*)'
  11. _TESTS = [{
  12. 'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
  13. 'info_dict': {
  14. "ext": "mp4",
  15. "title": "Fowler Jr. prend la direction de Jacksonville",
  16. "description": "Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ",
  17. "timestamp": 1430397346,
  18. }
  19. }]
  20. def _real_extract(self, url):
  21. video_id = url_basename(url)
  22. webpage = self._download_webpage(url, video_id)
  23. title = self._search_regex(
  24. r'<span itemprop="name"[^>]*>([^\n]*)</span>', webpage, 'video title', default=None)
  25. video_url = self._search_regex(
  26. r'<span itemprop="contentURL" content="([^"]+)"', webpage, 'video URL')
  27. upload_date = parse_iso8601(self._search_regex(
  28. r'<span itemprop="uploadDate" content="([^"]+)"', webpage, 'upload date', default=None))
  29. description = self._search_regex(
  30. r'<span itemprop="description"[^>]*>([^\n]*)</span>', webpage, 'description', default=None)
  31. thumbnail = self._search_regex(
  32. r'<span itemprop="thumbnailUrl" content="([^"]+)"', webpage, 'upload date', default=None)
  33. return {
  34. 'id': video_id,
  35. 'title': title,
  36. 'description': description,
  37. 'thumbnail': thumbnail,
  38. 'timestamp': upload_date,
  39. 'formats': [{
  40. 'url': video_url,
  41. }],
  42. }