浏览代码

parse portable infoboxes

Markus-Rost 4 年之前
父节点
当前提交
d3ab8e8830
共有 3 个文件被更改,包括 128 次插入21 次删除
  1. 40 4
      cmds/wiki/general.js
  2. 14 2
      cmds/wiki/random.js
  3. 74 15
      util/functions.js

+ 40 - 4
cmds/wiki/general.js

@@ -1,6 +1,6 @@
 const {MessageEmbed} = require('discord.js');
 const parse_page = require('../../functions/parse_page.js');
-const {htmlToPlain, htmlToDiscord} = require('../../util/functions.js');
+const {parse_infobox, htmlToPlain, htmlToDiscord} = require('../../util/functions.js');
 const extract_desc = require('../../util/extract_desc.js');
 const {limit: {interwiki: interwikiLimit}, wikiProjects} = require('../../util/default.json');
 const Wiki = require('../../util/wiki.js');
@@ -85,7 +85,7 @@ function gamepedia_check_wiki(lang, msg, title, wiki, cmd, reaction, spoiler = '
 		return fn.diff(lang, msg, args, wiki, reaction, spoiler);
 	}
 	var noRedirect = ( querystring.getAll('redirect').pop() === 'no' || ( querystring.has('action') && querystring.getAll('action').pop() !== 'view' ) );
-	got.get( wiki + 'api.php?action=query&meta=siteinfo&siprop=general|namespaces|specialpagealiases&iwurl=true' + ( noRedirect ? '' : '&redirects=true' ) + '&prop=pageimages|categoryinfo|pageprops|extracts&piprop=original|name&ppprop=description|displaytitle|page_image_free&explaintext=true&exsectionformat=raw&exlimit=1&converttitles=true&titles=%1F' + encodeURIComponent( title.replace( /\x1F/g, '\ufffd' ) ) + '&format=json' ).then( response => {
+	got.get( wiki + 'api.php?action=query&meta=siteinfo&siprop=general|namespaces|specialpagealiases&iwurl=true' + ( noRedirect ? '' : '&redirects=true' ) + '&prop=pageimages|categoryinfo|pageprops|extracts&piprop=original|name&ppprop=description|displaytitle|page_image_free|infoboxes&explaintext=true&exsectionformat=raw&exlimit=1&converttitles=true&titles=%1F' + encodeURIComponent( title.replace( /\x1F/g, '\ufffd' ) ) + '&format=json' ).then( response => {
 		var body = response.body;
 		if ( body && body.warnings ) log_warn(body.warnings);
 		if ( response.statusCode !== 200 || !body || body.batchcomplete === undefined || !body.query ) {
@@ -181,7 +181,7 @@ function gamepedia_check_wiki(lang, msg, title, wiki, cmd, reaction, spoiler = '
 				if ( reaction ) reaction.removeEmoji();
 			}
 			else if ( ( querypage.missing !== undefined && querypage.known === undefined && !( noRedirect || querypage.categoryinfo ) ) || querypage.invalid !== undefined ) {
-				got.get( wiki + 'api.php?action=query&prop=pageimages|categoryinfo|pageprops|extracts&piprop=original|name&ppprop=description|displaytitle|page_image_free&explaintext=true&exsectionformat=raw&exlimit=1&generator=search&gsrnamespace=4|12|14|' + Object.values(body.query.namespaces).filter( ns => ns.content !== undefined ).map( ns => ns.id ).join('|') + '&gsrlimit=1&gsrsearch=' + encodeURIComponent( title ) + '&format=json' ).then( srresponse => {
+				got.get( wiki + 'api.php?action=query&prop=pageimages|categoryinfo|pageprops|extracts&piprop=original|name&ppprop=description|displaytitle|page_image_free|infoboxes&explaintext=true&exsectionformat=raw&exlimit=1&generator=search&gsrnamespace=4|12|14|' + Object.values(body.query.namespaces).filter( ns => ns.content !== undefined ).map( ns => ns.id ).join('|') + '&gsrlimit=1&gsrsearch=' + encodeURIComponent( title ) + '&format=json' ).then( srresponse => {
 					var srbody = srresponse.body;
 					if ( srbody && srbody.warnings ) log_warn(srbody.warnings);
 					if ( srresponse.statusCode !== 200 || !srbody || srbody.batchcomplete === undefined ) {
@@ -257,6 +257,18 @@ function gamepedia_check_wiki(lang, msg, title, wiki, cmd, reaction, spoiler = '
 								if ( msg.showEmbed() ) embed.addField( category[0], category.slice(1).join('\n') );
 								else text += '\n\n' + category.join('\n');
 							}
+
+							if ( !embed.fields.length && querypage.pageprops && querypage.pageprops.infoboxes ) {
+								try {
+									var infobox = JSON.parse(querypage.pageprops.infoboxes)?.[0];
+									if ( infobox?.parser_tag_version === 2 ) infobox.data.forEach( group => {
+										parse_infobox(group, embed, new URL(body.query.general.logo, wiki).href);
+									} );
+								}
+								catch ( error ) {
+									console.log( '- Failed to parse the infobox: ' + error );
+								}
+							}
 				
 							msg.sendChannel( spoiler + '<' + pagelink + '>' + text + spoiler, {embed} ).then( message => parse_page(message, querypage.title, embed, wiki, ( querypage.title === body.query.general.mainpage ? '' : new URL(body.query.general.logo, wiki).href )) );
 						}
@@ -337,6 +349,18 @@ function gamepedia_check_wiki(lang, msg, title, wiki, cmd, reaction, spoiler = '
 					if ( msg.showEmbed() ) embed.addField( category[0], category.slice(1).join('\n') );
 					else text += '\n\n' + category.join('\n');
 				}
+
+				if ( !embed.fields.length && querypage.pageprops && querypage.pageprops.infoboxes ) {
+					try {
+						var infobox = JSON.parse(querypage.pageprops.infoboxes)?.[0];
+						if ( infobox?.parser_tag_version === 2 ) infobox.data.forEach( group => {
+							parse_infobox(group, embed, new URL(body.query.general.logo, wiki).href);
+						} );
+					}
+					catch ( error ) {
+						console.log( '- Failed to parse the infobox: ' + error );
+					}
+				}
 				
 				msg.sendChannel( spoiler + '<' + pagelink + '>' + text + spoiler, {embed} ).then( message => parse_page(message, querypage.title, embed, wiki, ( querypage.title === body.query.general.mainpage ? '' : new URL(body.query.general.logo, wiki).href )) );
 				
@@ -390,7 +414,7 @@ function gamepedia_check_wiki(lang, msg, title, wiki, cmd, reaction, spoiler = '
 		else {
 			var pagelink = wiki.toLink(body.query.general.mainpage, querystring, fragment);
 			var embed = new MessageEmbed().setAuthor( body.query.general.sitename ).setTitle( body.query.general.mainpage.escapeFormatting() ).setURL( pagelink ).setThumbnail( new URL(body.query.general.logo, wiki).href );
-			got.get( wiki + 'api.php?action=query' + ( noRedirect ? '' : '&redirects=true' ) + '&prop=pageprops|extracts&ppprop=description|displaytitle&explaintext=true&exsectionformat=raw&exlimit=1&titles=' + encodeURIComponent( body.query.general.mainpage ) + '&format=json' ).then( mpresponse => {
+			got.get( wiki + 'api.php?action=query' + ( noRedirect ? '' : '&redirects=true' ) + '&prop=pageprops|extracts&ppprop=description|displaytitle|infoboxes&explaintext=true&exsectionformat=raw&exlimit=1&titles=' + encodeURIComponent( body.query.general.mainpage ) + '&format=json' ).then( mpresponse => {
 				var mpbody = mpresponse.body;
 				if ( mpbody && mpbody.warnings ) log_warn(body.warnings);
 				if ( mpresponse.statusCode !== 200 || !mpbody || mpbody.batchcomplete === undefined || !mpbody.query ) {
@@ -413,6 +437,18 @@ function gamepedia_check_wiki(lang, msg, title, wiki, cmd, reaction, spoiler = '
 						embed.setDescription( description );
 					}
 				}
+				
+				if ( !embed.fields.length && querypage.pageprops && querypage.pageprops.infoboxes ) {
+					try {
+						var infobox = JSON.parse(querypage.pageprops.infoboxes)?.[0];
+						if ( infobox?.parser_tag_version === 2 ) infobox.data.forEach( group => {
+							parse_infobox(group, embed, '');
+						} );
+					}
+					catch ( error ) {
+						console.log( '- Failed to parse the infobox: ' + error );
+					}
+				}
 			}, error => {
 				console.log( '- Error while getting the main page: ' + error );
 			} ).finally( () => {

+ 14 - 2
cmds/wiki/random.js

@@ -1,7 +1,7 @@
 const {MessageEmbed} = require('discord.js');
 const fandom_random = require('./fandom/random.js').run;
 const parse_page = require('../../functions/parse_page.js');
-const {htmlToPlain, htmlToDiscord} = require('../../util/functions.js');
+const {parse_infobox, htmlToPlain, htmlToDiscord} = require('../../util/functions.js');
 const extract_desc = require('../../util/extract_desc.js');
 
 /**
@@ -13,7 +13,7 @@ const extract_desc = require('../../util/extract_desc.js');
  * @param {String} spoiler - If the response is in a spoiler.
  */
 function gamepedia_random(lang, msg, wiki, reaction, spoiler) {
-	got.get( wiki + 'api.php?action=query&meta=siteinfo&siprop=general&prop=pageimages|pageprops|extracts&piprop=original|name&ppprop=description|displaytitle|page_image_free&explaintext=true&exsectionformat=raw&exlimit=1&generator=random&grnnamespace=0&format=json' ).then( response => {
+	got.get( wiki + 'api.php?action=query&meta=siteinfo&siprop=general&prop=pageimages|pageprops|extracts&piprop=original|name&ppprop=description|displaytitle|page_image_free|infoboxes&explaintext=true&exsectionformat=raw&exlimit=1&generator=random&grnnamespace=0&format=json' ).then( response => {
 		var body = response.body;
 		if ( body && body.warnings ) log_warn(body.warnings);
 		if ( response.statusCode !== 200 || !body || body.batchcomplete === undefined || !body.query || !body.query.pages ) {
@@ -58,6 +58,18 @@ function gamepedia_random(lang, msg, wiki, reaction, spoiler) {
 			}
 			else embed.setThumbnail( new URL(body.query.general.logo, wiki).href );
 			
+			if ( !embed.fields.length && querypage.pageprops && querypage.pageprops.infoboxes ) {
+				try {
+					var infobox = JSON.parse(querypage.pageprops.infoboxes)?.[0];
+					if ( infobox?.parser_tag_version === 2 ) infobox.data.forEach( group => {
+						parse_infobox(group, embed, new URL(body.query.general.logo, wiki).href);
+					} );
+				}
+				catch ( error ) {
+					console.log( '- Failed to parse the infobox: ' + error );
+				}
+			}
+			
 			msg.sendChannel( '🎲 ' + spoiler + '<' + pagelink + '>' + spoiler, {embed} ).then( message => parse_page(message, querypage.title, embed, wiki, ( querypage.title === body.query.general.mainpage ? '' : new URL(body.query.general.logo, wiki).href )) );
 		}
 	}, error => {

+ 74 - 15
util/functions.js

@@ -8,26 +8,64 @@ const got = require('got').extend( {
 	responseType: 'json'
 } );
 
+/**
+ * Parse infobox content
+ * @param {Object} infobox - The content of the infobox.
+ * @param {import('discord.js').MessageEmbed} embed - The message embed.
+ * @param {String} [thumbnail] - The default thumbnail for the wiki.
+ */
+function parse_infobox(infobox, embed, thumbnail) {
+	if ( embed.fields.length >= 25 ) return;
+	switch ( infobox.type ) {
+		case 'data':
+			var {label = '', value = ''} = infobox.data;
+			label = htmlToPlain(label).trim();
+			value = htmlToPlain(value).trim();
+			if ( label && value ) embed.addField( label, value, true );
+			break;
+		case 'group':
+			infobox.data.value.forEach( group => {
+				parse_infobox(group, embed, thumbnail);
+			} );
+			break;
+		case 'header':
+			var {value = ''} = infobox.data;
+			value = htmlToPlain(value).trim();
+			if ( value ) embed.addField( '\u200b', '__**' + value + '**__', false );
+			break;
+		case 'image':
+			if ( embed.thumbnail?.url !== thumbnail ) return;
+			var image = infobox.data.find( img => {
+				return ( /^(?:https?:)?\/\//.test(img.url) && /\.(?:png|jpg|jpeg|gif)$/.test(img.name) );
+			} );
+			if ( image ) embed.setThumbnail( image.url );
+			break;
+	}
+}
+
 /**
  * Make wikitext formatting usage.
  * @param {String} [text] - The text to modify.
  * @param {Boolean} [showEmbed] - If the text is used in an embed.
- * @param {import('./wiki.js')|String} [args] - The text contains markdown links.
+ * @param {import('./wiki.js')} [wiki] - The wiki.
+ * @param {String} [title] - The page title.
+ * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  * @returns {String}
  */
-function toFormatting(text = '', showEmbed = false, ...args) {
-	if ( showEmbed ) return toMarkdown(text, ...args);
-	else return toPlaintext(text);
+function toFormatting(text = '', showEmbed = false, wiki, title = '', fullWikitext = false) {
+	if ( showEmbed ) return toMarkdown(text, wiki, title, fullWikitext);
+	else return toPlaintext(text, fullWikitext);
 };
 
 /**
  * Turns wikitext formatting into markdown.
  * @param {String} [text] - The text to modify.
- * @param {import('./wiki.js')} [wiki] - The wiki.
+ * @param {import('./wiki.js')} wiki - The wiki.
  * @param {String} [title] - The page title.
+ * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  * @returns {String}
  */
-function toMarkdown(text = '', wiki, title = '') {
+function toMarkdown(text = '', wiki, title = '', fullWikitext = false) {
 	text = text.replace( /[()\\]/g, '\\$&' );
 	var link = null;
 	var regex = /\[\[(?:([^\|\]]+)\|)?([^\]]+)\]\]([a-z]*)/g;
@@ -36,9 +74,18 @@ function toMarkdown(text = '', wiki, title = '') {
 		var page = wiki.toLink(( /^[#\/]/.test(pagetitle) ? title + ( pagetitle.startsWith( '/' ) ? pagetitle : '' ) : pagetitle ), '', ( pagetitle.startsWith( '#' ) ? pagetitle.substring(1) : '' ), true);
 		text = text.replaceSave( link[0], '[' + link[2] + link[3] + '](' + page + ')' );
 	}
-	regex = /\/\*\s*([^\*]+?)\s*\*\/\s*(.)?/g;
-	while ( title !== '' && ( link = regex.exec(text) ) !== null ) {
-		text = text.replaceSave( link[0], '[→' + link[1] + '](' + wiki.toLink(title, '', link[1], true) + ')' + ( link[2] ? ': ' + link[2] : '' ) );
+	if ( title !== '' ) {
+		regex = /\/\*\s*([^\*]+?)\s*\*\/\s*(.)?/g;
+		while ( ( link = regex.exec(text) ) !== null ) {
+			text = text.replaceSave( link[0], '[→' + link[1] + '](' + wiki.toLink(title, '', link[1], true) + ')' + ( link[2] ? ': ' + link[2] : '' ) );
+		}
+	}
+	if ( fullWikitext ) {
+		regex = /\[(?:https?:)?\/\/([^ ]+) ([^\]]+)\]/g;
+		while ( ( link = regex.exec(text) ) !== null ) {
+			text = text.replaceSave( link[0], '[' + link[2] + '](https://' + link[1] + ')' );
+		}
+		return htmlToDiscord( text, true, true ).replaceSave( /'''/g, '**' ).replaceSave( /''/g, '*' );
 	}
 	return escapeFormatting(text, true);
 };
@@ -46,7 +93,7 @@ function toMarkdown(text = '', wiki, title = '') {
 /**
  * Removes wikitext formatting.
  * @param {String} [text] - The text to modify.
- * @param {Boolean} [canHTML] - If the text can contain HTML.
+ * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  * @returns {String}
  */
 function toPlaintext(text = '', fullWikitext = false) {
@@ -64,9 +111,17 @@ function toPlaintext(text = '', fullWikitext = false) {
  */
 function htmlToPlain(html) {
 	var text = '';
+	var reference = false;
 	var parser = new htmlparser.Parser( {
+		onopentag: (tagname, attribs) => {
+			if ( tagname === 'sup' && attribs.class === 'reference' ) reference = true;
+			if ( tagname === 'br' ) text += '\n';
+		},
 		ontext: (htmltext) => {
-			text += escapeFormatting(htmltext);
+			if ( !reference ) text += escapeFormatting(htmltext);
+		},
+		onclosetag: (tagname) => {
+			if ( tagname === 'sup' ) reference = false;
 		}
 	} );
 	parser.write( html );
@@ -77,9 +132,10 @@ function htmlToPlain(html) {
 /**
  * Change HTML text to markdown text.
  * @param {String} html - The text in HTML.
+ * @param {Boolean[]} [escapeArgs] - Arguments for the escaping of text formatting.
  * @returns {String}
  */
-function htmlToDiscord(html) {
+function htmlToDiscord(html, ...escapeArgs) {
 	var text = '';
 	var parser = new htmlparser.Parser( {
 		onopentag: (tagname, attribs) => {
@@ -99,7 +155,7 @@ function htmlToDiscord(html) {
 			}
 		},
 		ontext: (htmltext) => {
-			text += escapeFormatting(htmltext);
+			text += escapeFormatting(htmltext, ...escapeArgs);
 		},
 		onclosetag: (tagname) => {
 			switch (tagname) {
@@ -127,15 +183,18 @@ function htmlToDiscord(html) {
  * Escapes formatting.
  * @param {String} [text] - The text to modify.
  * @param {Boolean} [isMarkdown] - The text contains markdown links.
+ * @param {Boolean} [keepLinks] - Don't escape non-markdown links.
  * @returns {String}
  */
-function escapeFormatting(text = '', isMarkdown = false) {
+function escapeFormatting(text = '', isMarkdown = false, keepLinks = false) {
 	if ( !isMarkdown ) text = text.replace( /[()\\]/g, '\\$&' );
-	return text.replace( /[`_*~:<>{}@|]|\/\//g, '\\$&' );
+	if ( !keepLinks ) text = text.replace( /\/\//g, '\\$&' );
+	return text.replace( /[`_*~:<>{}@|]/g, '\\$&' );
 };
 
 module.exports = {
 	got,
+	parse_infobox,
 	toFormatting,
 	toMarkdown,
 	toPlaintext,