tvp.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. # encoding: utf-8
  2. import re
  3. import json
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. determine_ext,
  7. ExtractorError,
  8. RegexNotFoundError,
  9. )
  10. class TvpIE(InfoExtractor):
  11. IE_NAME = u'tvp.pl'
  12. _VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
  13. _INFO_URL = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s'
  14. _TEST = {
  15. u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
  16. u'file': u'31.10.2013-12878238.wmv',
  17. u'info_dict': {
  18. u'title': u'31.10.2013',
  19. u'description': u'31.10.2013',
  20. },
  21. }
  22. def _real_extract(self, url):
  23. mobj = re.match(self._VALID_URL, url)
  24. video_id = mobj.group('id')
  25. webpage = self._download_webpage(url, video_id, "Downloading video webpage")
  26. json_params = self._download_webpage(self._INFO_URL % video_id, video_id, "Downloading video metadata")
  27. try:
  28. params = json.loads(json_params)
  29. except:
  30. raise ExtractorError(u'Invalid JSON')
  31. self.report_extraction(video_id)
  32. try:
  33. video_url = params['video_url']
  34. except KeyError:
  35. raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
  36. try:
  37. title = self._og_search_title(webpage)
  38. except RegexNotFoundError:
  39. title = video_id
  40. info = {
  41. 'id': video_id,
  42. 'title': title,
  43. 'ext': 'wmv',
  44. 'url': video_url,
  45. }
  46. try:
  47. info['description'] = self._og_search_description(webpage)
  48. info['thumbnail'] = self._og_search_thumbnail(webpage)
  49. except RegexNotFoundError:
  50. pass
  51. return info