info.js 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. var format = require('url').format;
  2. var querystring = require('querystring');
  3. var JStream = require('jstream');
  4. var sax = require('sax');
  5. var request = require('./request');
  6. var util = require('./util');
  7. var sig = require('./sig');
  8. var FORMATS = require('./formats');
  9. var VIDEO_URL = 'https://www.youtube.com/watch?v=';
  10. var EMBED_URL = 'https://www.youtube.com/embed/';
  11. var VIDEO_EURL = 'https://youtube.googleapis.com/v/';
  12. var INFO_HOST = 'www.youtube.com';
  13. var INFO_PATH = '/get_video_info';
  14. var KEYS_TO_SPLIT = [
  15. 'keywords',
  16. 'fmt_list',
  17. 'fexp',
  18. 'watermark'
  19. ];
  20. /**
  21. * Gets info from a video.
  22. *
  23. * @param {String} link
  24. * @param {Object} opts
  25. * @param {Function(Error, Object)} callback
  26. */
  27. module.exports = function getInfo(link, opts, callback) {
  28. if (typeof opts === 'function') {
  29. callback = opts;
  30. opts = {};
  31. } else if (!opts) {
  32. opts = {};
  33. }
  34. var id = util.getVideoID(link);
  35. var url = VIDEO_URL + id;
  36. request(url, function(err, body) {
  37. if (err) return callback(err);
  38. // Get description from #eow-description
  39. var description = util.getVideoDescription(body);
  40. var jsonStr = util.between(body, 'ytplayer.config = ', '</script>');
  41. if (jsonStr) {
  42. var config = parseJSON(jsonStr);
  43. if (!config) {
  44. return callback(new Error('could not parse video page config'));
  45. }
  46. gotConfig(opts, description, config, callback);
  47. } else {
  48. url = EMBED_URL + id;
  49. request(url, function(err, body) {
  50. if (err) return callback(err);
  51. jsonStr = util.between(body,
  52. 'yt.setConfig(\'PLAYER_CONFIG\', ', '</script>');
  53. if (!jsonStr) {
  54. return callback(new Error('could not find `player config`'));
  55. }
  56. var config = parseJSON(jsonStr);
  57. if (!config) {
  58. return callback(new Error('could not parse embed page config'));
  59. }
  60. url = format({
  61. protocol: 'https',
  62. host: INFO_HOST,
  63. pathname: INFO_PATH,
  64. query: {
  65. video_id: id,
  66. eurl: VIDEO_EURL + id,
  67. sts: config.sts || '',
  68. },
  69. });
  70. request(url, function(err, body) {
  71. if (err) return callback(err);
  72. config.args = querystring.parse(body);
  73. gotConfig(opts, description, config, callback);
  74. });
  75. });
  76. }
  77. });
  78. };
  79. /**
  80. * JStream is used because we know when the JSON string begins,
  81. * but not when it ends. So a string that contains it, and that may
  82. * contain a bunch of other things, is read until the first object
  83. * is completely parsed.
  84. *
  85. * @param {String} body
  86. * @return {Object}
  87. */
  88. function parseJSON(body) {
  89. var jstream = new JStream();
  90. var config = null;
  91. jstream.on('data', function(data) {
  92. config = data;
  93. jstream.pause();
  94. });
  95. // Suppress errors. If there is one, `config` won't be defined,
  96. // which is already checked.
  97. jstream.on('error', function() {});
  98. jstream.end(body);
  99. return config;
  100. }
  101. /**
  102. * @param {Object} opts
  103. * @param {String} description
  104. * @param {Object} config
  105. * @param {Function(Error, Object)} callback
  106. */
  107. function gotConfig(opts, description, config, callback) {
  108. var info = config.args;
  109. if (info.status === 'fail') {
  110. var msg = info.errorcode && info.reason ?
  111. 'Code ' + info.errorcode + ': ' + info.reason : 'Video not found';
  112. callback(new Error(msg));
  113. return;
  114. }
  115. // Split some keys by commas.
  116. KEYS_TO_SPLIT.forEach(function(key) {
  117. if (!info[key]) return;
  118. info[key] = info[key]
  119. .split(',')
  120. .filter(function(v) { return v !== ''; });
  121. });
  122. info.fmt_list = info.fmt_list ?
  123. info.fmt_list.map(function(format) {
  124. return format.split('/');
  125. }) : [];
  126. if (info.video_verticals) {
  127. info.video_verticals = info.video_verticals
  128. .slice(1, -1)
  129. .split(', ')
  130. .filter(function(val) { return val !== ''; })
  131. .map(function(val) { return parseInt(val, 10); })
  132. ;
  133. }
  134. info.formats = util.parseFormats(info);
  135. if (!info.formats.length) {
  136. callback(new Error('Video not found'));
  137. return;
  138. }
  139. info.description = description;
  140. if (info.dashmpd) {
  141. sig.getTokens(config.assets.js, opts.debug, function(err, tokens) {
  142. if (err) return callback(err);
  143. info.dashmpd = info.dashmpd
  144. .replace(/\/s\/([a-fA-F0-9\.]+)/, function(_, s) {
  145. return '/signature/' + sig.decipher(tokens, s);
  146. });
  147. getDashManifest(info.dashmpd, opts.debug, function(err, dformats) {
  148. if (err) return callback(err);
  149. sig.decipherFormats(info.formats, tokens, opts.debug);
  150. var formatsMap = {};
  151. info.formats.forEach(function(f) { formatsMap[f.itag] = f; });
  152. // Merge formats from dash manifest with formats from video info page.
  153. dformats.forEach(function(f) {
  154. var cf = formatsMap[f.itag];
  155. if (cf) {
  156. for (var key in f) { if (!cf[key]) { cf[key] = f[key]; } }
  157. } else {
  158. formatsMap[f.itag] = f;
  159. }
  160. });
  161. info.formats = [];
  162. for (var itag in formatsMap) { info.formats.push(formatsMap[itag]); }
  163. info.formats.sort(util.sortFormats);
  164. callback(null, info);
  165. });
  166. });
  167. } else {
  168. sig.get(info.formats, config.assets.js, opts.debug, function(err) {
  169. if (err) return callback(err);
  170. info.formats.sort(util.sortFormats);
  171. callback(null, info);
  172. });
  173. }
  174. }
  175. /**
  176. * Gets additional DASH formats.
  177. *
  178. * @param {String} url
  179. * @param {Boolean} debug
  180. * @param {Function(!Error, Array.<Object>)} callback
  181. */
  182. function getDashManifest(url, debug, callback) {
  183. var formats = [];
  184. var currentFormat = null;
  185. var expectUrl = false;
  186. var parser = sax.parser(true);
  187. parser.onerror = callback;
  188. parser.onopentag = function(node) {
  189. if (node.name === 'Representation') {
  190. var itag = node.attributes.id;
  191. var meta = FORMATS[itag];
  192. if (!meta && debug) {
  193. console.warn('No format metadata for itag ' + itag + ' found');
  194. }
  195. currentFormat = { itag: itag };
  196. for (var key in meta) {
  197. currentFormat[key] = meta[key];
  198. }
  199. formats.push(currentFormat);
  200. }
  201. expectUrl = node.name === 'BaseURL';
  202. };
  203. parser.ontext = function(text) {
  204. if (expectUrl) {
  205. currentFormat.url = text;
  206. }
  207. };
  208. parser.onend = function() { callback(null, formats); };
  209. var req = request(url);
  210. req.on('error', callback);
  211. req.on('response', function(res) {
  212. res.setEncoding('utf8');
  213. res.on('error', callback);
  214. res.on('data', function(chunk) { parser.write(chunk); });
  215. res.on('end', parser.close.bind(parser));
  216. });
  217. }