123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247 |
- var format = require('url').format;
- var querystring = require('querystring');
- var JStream = require('jstream');
- var sax = require('sax');
- var request = require('./request');
- var util = require('./util');
- var sig = require('./sig');
- var FORMATS = require('./formats');
- var VIDEO_URL = 'https://www.youtube.com/watch?v=';
- var EMBED_URL = 'https://www.youtube.com/embed/';
- var VIDEO_EURL = 'https://youtube.googleapis.com/v/';
- var INFO_HOST = 'www.youtube.com';
- var INFO_PATH = '/get_video_info';
- var KEYS_TO_SPLIT = [
- 'keywords',
- 'fmt_list',
- 'fexp',
- 'watermark'
- ];
- /**
- * Gets info from a video.
- *
- * @param {String} link
- * @param {Object} opts
- * @param {Function(Error, Object)} callback
- */
- module.exports = function getInfo(link, opts, callback) {
- if (typeof opts === 'function') {
- callback = opts;
- opts = {};
- } else if (!opts) {
- opts = {};
- }
- var id = util.getVideoID(link);
- var url = VIDEO_URL + id;
- request(url, function(err, body) {
- if (err) return callback(err);
- // Get description from #eow-description
- var description = util.getVideoDescription(body);
- var jsonStr = util.between(body, 'ytplayer.config = ', '</script>');
- if (jsonStr) {
- var config = parseJSON(jsonStr);
- if (!config) {
- return callback(new Error('could not parse video page config'));
- }
- gotConfig(opts, description, config, callback);
- } else {
- url = EMBED_URL + id;
- request(url, function(err, body) {
- if (err) return callback(err);
- jsonStr = util.between(body,
- 'yt.setConfig(\'PLAYER_CONFIG\', ', '</script>');
- if (!jsonStr) {
- return callback(new Error('could not find `player config`'));
- }
- var config = parseJSON(jsonStr);
- if (!config) {
- return callback(new Error('could not parse embed page config'));
- }
- url = format({
- protocol: 'https',
- host: INFO_HOST,
- pathname: INFO_PATH,
- query: {
- video_id: id,
- eurl: VIDEO_EURL + id,
- sts: config.sts || '',
- },
- });
- request(url, function(err, body) {
- if (err) return callback(err);
- config.args = querystring.parse(body);
- gotConfig(opts, description, config, callback);
- });
- });
- }
- });
- };
- /**
- * JStream is used because we know when the JSON string begins,
- * but not when it ends. So a string that contains it, and that may
- * contain a bunch of other things, is read until the first object
- * is completely parsed.
- *
- * @param {String} body
- * @return {Object}
- */
- function parseJSON(body) {
- var jstream = new JStream();
- var config = null;
- jstream.on('data', function(data) {
- config = data;
- jstream.pause();
- });
- // Suppress errors. If there is one, `config` won't be defined,
- // which is already checked.
- jstream.on('error', function() {});
- jstream.end(body);
- return config;
- }
- /**
- * @param {Object} opts
- * @param {String} description
- * @param {Object} config
- * @param {Function(Error, Object)} callback
- */
- function gotConfig(opts, description, config, callback) {
- var info = config.args;
- if (info.status === 'fail') {
- var msg = info.errorcode && info.reason ?
- 'Code ' + info.errorcode + ': ' + info.reason : 'Video not found';
- callback(new Error(msg));
- return;
- }
- // Split some keys by commas.
- KEYS_TO_SPLIT.forEach(function(key) {
- if (!info[key]) return;
- info[key] = info[key]
- .split(',')
- .filter(function(v) { return v !== ''; });
- });
- info.fmt_list = info.fmt_list ?
- info.fmt_list.map(function(format) {
- return format.split('/');
- }) : [];
- if (info.video_verticals) {
- info.video_verticals = info.video_verticals
- .slice(1, -1)
- .split(', ')
- .filter(function(val) { return val !== ''; })
- .map(function(val) { return parseInt(val, 10); })
- ;
- }
- info.formats = util.parseFormats(info);
- if (!info.formats.length) {
- callback(new Error('Video not found'));
- return;
- }
- info.description = description;
- if (info.dashmpd) {
- sig.getTokens(config.assets.js, opts.debug, function(err, tokens) {
- if (err) return callback(err);
- info.dashmpd = info.dashmpd
- .replace(/\/s\/([a-fA-F0-9\.]+)/, function(_, s) {
- return '/signature/' + sig.decipher(tokens, s);
- });
- getDashManifest(info.dashmpd, opts.debug, function(err, dformats) {
- if (err) return callback(err);
- sig.decipherFormats(info.formats, tokens, opts.debug);
- var formatsMap = {};
- info.formats.forEach(function(f) { formatsMap[f.itag] = f; });
- // Merge formats from dash manifest with formats from video info page.
- dformats.forEach(function(f) {
- var cf = formatsMap[f.itag];
- if (cf) {
- for (var key in f) { if (!cf[key]) { cf[key] = f[key]; } }
- } else {
- formatsMap[f.itag] = f;
- }
- });
- info.formats = [];
- for (var itag in formatsMap) { info.formats.push(formatsMap[itag]); }
- info.formats.sort(util.sortFormats);
- callback(null, info);
- });
- });
- } else {
- sig.get(info.formats, config.assets.js, opts.debug, function(err) {
- if (err) return callback(err);
- info.formats.sort(util.sortFormats);
- callback(null, info);
- });
- }
- }
- /**
- * Gets additional DASH formats.
- *
- * @param {String} url
- * @param {Boolean} debug
- * @param {Function(!Error, Array.<Object>)} callback
- */
- function getDashManifest(url, debug, callback) {
- var formats = [];
- var currentFormat = null;
- var expectUrl = false;
- var parser = sax.parser(true);
- parser.onerror = callback;
- parser.onopentag = function(node) {
- if (node.name === 'Representation') {
- var itag = node.attributes.id;
- var meta = FORMATS[itag];
- if (!meta && debug) {
- console.warn('No format metadata for itag ' + itag + ' found');
- }
- currentFormat = { itag: itag };
- for (var key in meta) {
- currentFormat[key] = meta[key];
- }
- formats.push(currentFormat);
- }
- expectUrl = node.name === 'BaseURL';
- };
- parser.ontext = function(text) {
- if (expectUrl) {
- currentFormat.url = text;
- }
- };
- parser.onend = function() { callback(null, formats); };
- var req = request(url);
- req.on('error', callback);
- req.on('response', function(res) {
- res.setEncoding('utf8');
- res.on('error', callback);
- res.on('data', function(chunk) { parser.write(chunk); });
- res.on('end', parser.close.bind(parser));
- });
- }
|