npr.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import os.path
  4. import re
  5. from ..compat import compat_urllib_parse_unquote
  6. from ..utils import url_basename
  7. from .common import InfoExtractor
  8. class NprIE(InfoExtractor):
  9. _VALID_URL = r'http://(?:www\.)?npr\.org/player/v2/mediaPlayer.html?.*id=(?P<id>[0-9]+)'
  10. _TEST = {
  11. 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205',
  12. 'info_dict': {
  13. 'id': '449974205',
  14. 'ext': 'mp4',
  15. 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More'
  16. }
  17. }
  18. def _real_extract(self, url):
  19. mobj = re.match(self._VALID_URL, url)
  20. video_id = mobj.group('id')
  21. webpage_url = 'http://www.npr.org/player/v2/mediaPlayer.html?id=' + video_id
  22. webpage = self._download_webpage(webpage_url, video_id)
  23. key = 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010'
  24. xml_url = 'http://api.npr.org/query?id=%s&apiKey=%s' % (video_id, key)
  25. json_url = 'http://api.npr.org/query?id=%s&apiKey=%s&format=json' % (video_id, key)
  26. formats = []
  27. entries = []
  28. config = self._download_json(json_url, video_id)
  29. content = config["list"]["story"]
  30. album_title = config["list"]["story"][0]['song'][0]['album']['albumTitle']
  31. print album_title['$text']
  32. for key in content:
  33. if "audio" in key:
  34. for x in key['audio']:
  35. if x['type'] == 'standard':
  36. playlist = True
  37. song_duration = x["duration"]['$text']
  38. song_title = x["title"]["$text"]
  39. song_id = x["id"]
  40. for k in x["format"]:
  41. if type(x["format"][k]) is list:
  42. for z in x["format"][k]:
  43. formats.append({ 'format': z['type'],
  44. 'url' : z['$text']
  45. })
  46. else:
  47. formats.append({ 'format': k,
  48. 'url' : x["format"][k]['$text']
  49. })
  50. entries.append({ "title":song_title,
  51. "id":song_id,
  52. "duration": song_duration ,
  53. "formats":formats})
  54. formats = []
  55. return { '_type': 'playlist',
  56. 'id' : video_id,
  57. 'title' : album_title,
  58. 'entries': entries }