cnbc.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. js_to_json,
  6. smuggle_url,
  7. )
  8. class CNBCIE(InfoExtractor):
  9. _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
  10. _TEST = {
  11. 'url': 'http://video.cnbc.com/gallery/?video=3000503714',
  12. 'info_dict': {
  13. 'id': '3000503714',
  14. 'ext': 'mp4',
  15. 'title': 'Fighting zombies is big business',
  16. 'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
  17. 'timestamp': 1459332000,
  18. 'upload_date': '20160330',
  19. 'uploader': 'NBCU-CNBC',
  20. },
  21. 'params': {
  22. # m3u8 download
  23. 'skip_download': True,
  24. },
  25. }
  26. def _real_extract(self, url):
  27. video_id = self._match_id(url)
  28. return {
  29. '_type': 'url_transparent',
  30. 'ie_key': 'ThePlatform',
  31. 'url': smuggle_url(
  32. 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
  33. {'force_smil_url': True}),
  34. 'id': video_id,
  35. }
  36. class CNBCNewIE(InfoExtractor):
  37. IE_NAME = 'CNBC:new'
  38. _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video.*/(?P<id>[^.]+)'
  39. _TEST = {
  40. 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
  41. 'info_dict': {
  42. 'id': '7000031301',
  43. 'ext': 'mp4',
  44. 'title': 'Trump: I don\'t necessarily agree with raising rates',
  45. 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
  46. 'timestamp': 1531958400,
  47. 'upload_date': '20180719',
  48. 'uploader': 'NBCU-CNBC',
  49. },
  50. 'params': {
  51. # m3u8 download
  52. 'skip_download': True,
  53. },
  54. }
  55. CNBC_URL_TEMPLATE = 'http://video.cnbc.com/gallery/?video=%s'
  56. def _real_extract(self, url):
  57. display_id = self._match_id(url)
  58. webpage = self._download_webpage(url, display_id)
  59. video_id = self._parse_json(
  60. self._search_regex(
  61. r'(?s).*<script[^>]*>.*?({.+?content_id.+?}).*?</script>',
  62. webpage, display_id),
  63. display_id, transform_source=js_to_json
  64. )['content_id']
  65. return self.url_result(self.CNBC_URL_TEMPLATE % video_id, 'CNBC')