const htmlparser = require('htmlparser2');
const got = require('got').extend( {
throwHttpErrors: false,
timeout: 5000,
headers: {
'User-Agent': 'Wiki-Bot/' + ( isDebug ? 'testing' : process.env.npm_package_version ) + ' (Discord; ' + process.env.npm_package_name + ')'
},
responseType: 'json'
} );
/**
* Parse infobox content
* @param {Object} infobox - The content of the infobox.
* @param {import('discord.js').MessageEmbed} embed - The message embed.
* @param {String} [thumbnail] - The default thumbnail for the wiki.
* @returns {import('discord.js').MessageEmbed?}
*/
function parse_infobox(infobox, embed, thumbnail) {
if ( !infobox || embed.fields.length >= 25 || embed.length > 5500 ) return;
if ( infobox.parser_tag_version === 2 ) {
infobox.data.forEach( group => {
parse_infobox(group, embed, thumbnail);
} );
embed.fields = embed.fields.filter( (field, i, fields) => {
if ( field.name !== '\u200b' ) return true;
return ( fields[i + 1]?.name && fields[i + 1].name !== '\u200b' );
} );
return embed;
}
switch ( infobox.type ) {
case 'data':
var {label = '', value = '', source = ''} = infobox.data;
label = htmlToPlain(label).trim();
value = htmlToPlain(value).trim();
if ( label.includes( '*UNKNOWN LINK*' ) ) {
label = '`' + source + '`';
embed.brokenInfobox = true;
}
if ( value.includes( '*UNKNOWN LINK*' ) ) {
value = '`' + source + '`';
embed.brokenInfobox = true;
}
if ( label.length > 50 ) label = label.substring(0, 50) + '\u2026';
if ( value.length > 250 ) value = value.substring(0, 250) + '\u2026';
if ( label && value ) embed.addField( label, value, true );
break;
case 'group':
infobox.data.value.forEach( group => {
parse_infobox(group, embed, thumbnail);
} );
break;
case 'header':
var {value = ''} = infobox.data;
value = htmlToPlain(value).trim();
if ( value.length > 100 ) value = value.substring(0, 100) + '\u2026';
if ( value ) embed.addField( '\u200b', '__**' + value + '**__', false );
break;
case 'image':
if ( embed.thumbnail?.url !== thumbnail ) return;
var image = infobox.data.find( img => {
return ( /^(?:https?:)?\/\//.test(img.url) && /\.(?:png|jpg|jpeg|gif)$/.test(img.name) );
} );
if ( image ) embed.setThumbnail( image.url.replace( /^(?:https?:)?\/\//, 'https://' ) );
break;
}
}
/**
* Make wikitext formatting usage.
* @param {String} [text] - The text to modify.
* @param {Boolean} [showEmbed] - If the text is used in an embed.
* @param {import('./wiki.js')} [wiki] - The wiki.
* @param {String} [title] - The page title.
* @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
* @returns {String}
*/
function toFormatting(text = '', showEmbed = false, wiki, title = '', fullWikitext = false) {
if ( showEmbed ) return toMarkdown(text, wiki, title, fullWikitext);
else return toPlaintext(text, fullWikitext);
};
/**
* Turns wikitext formatting into markdown.
* @param {String} [text] - The text to modify.
* @param {import('./wiki.js')} wiki - The wiki.
* @param {String} [title] - The page title.
* @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
* @returns {String}
*/
function toMarkdown(text = '', wiki, title = '', fullWikitext = false) {
text = text.replace( /[()\\]/g, '\\$&' );
var link = null;
var regex = /\[\[(?:([^\|\]]+)\|)?([^\]]+)\]\]([a-z]*)/g;
while ( ( link = regex.exec(text) ) !== null ) {
var pagetitle = ( link[1] || link[2] );
var page = wiki.toLink(( /^[#\/]/.test(pagetitle) ? title + ( pagetitle.startsWith( '/' ) ? pagetitle : '' ) : pagetitle ), '', ( pagetitle.startsWith( '#' ) ? pagetitle.substring(1) : '' ), true);
text = text.replaceSave( link[0], '[' + link[2] + link[3] + '](' + page + ')' );
}
if ( title !== '' ) {
regex = /\/\*\s*([^\*]+?)\s*\*\/\s*(.)?/g;
while ( ( link = regex.exec(text) ) !== null ) {
text = text.replaceSave( link[0], '[→' + link[1] + '](' + wiki.toLink(title, '', link[1], true) + ')' + ( link[2] ? ': ' + link[2] : '' ) );
}
}
if ( fullWikitext ) {
regex = /\[(?:https?:)?\/\/([^ ]+) ([^\]]+)\]/g;
while ( ( link = regex.exec(text) ) !== null ) {
text = text.replaceSave( link[0], '[' + link[2] + '](https://' + link[1] + ')' );
}
return htmlToDiscord( text, true, true ).replaceSave( /'''/g, '**' ).replaceSave( /''/g, '*' );
}
return escapeFormatting(text, true);
};
/**
* Removes wikitext formatting.
* @param {String} [text] - The text to modify.
* @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
* @returns {String}
*/
function toPlaintext(text = '', fullWikitext = false) {
text = text.replace( /\[\[(?:[^\|\]]+\|)?([^\]]+)\]\]/g, '$1' ).replace( /\/\*\s*([^\*]+?)\s*\*\//g, '→$1:' );
if ( fullWikitext ) {
return htmlToPlain( text.replace( /\[(?:https?:)?\/\/(?:[^ ]+) ([^\]]+)\]/g, '$1' ) );
}
else return escapeFormatting(text);
};
/**
* Change HTML text to plain text.
* @param {String} html - The text in HTML.
* @returns {String}
*/
function htmlToPlain(html) {
var text = '';
var reference = false;
var listlevel = -1;
var parser = new htmlparser.Parser( {
onopentag: (tagname, attribs) => {
if ( tagname === 'sup' && attribs.class === 'reference' ) reference = true;
if ( tagname === 'br' ) {
text += '\n';
if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel + 3);
}
if ( tagname === 'hr' ) {
if ( !text.endsWith( '\n' ) ) text += '\n';
text += '─'.repeat(10) + '\n';
}
if ( tagname === 'p' && !text.endsWith( '\n' ) ) text += '\n';
if ( tagname === 'ul' ) listlevel++;
if ( tagname === 'li' ) {
if ( !text.endsWith( '\n' ) ) text += '\n';
if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
text += '• ';
}
},
ontext: (htmltext) => {
if ( !reference ) {
if ( listlevel > -1 ) {
htmltext = htmltext.replace( /\n/g, '\n' + '\u200b '.repeat(4 * listlevel + 3) );
}
text += escapeFormatting(htmltext);
}
},
onclosetag: (tagname) => {
if ( tagname === 'sup' ) reference = false;
if ( tagname === 'ul' ) listlevel--;
},
oncomment: (commenttext) => {
if ( /^LINK'" \d+:\d+$/.test(commenttext) ) text += '*UNKNOWN LINK*';
}
} );
parser.write( html );
parser.end();
return text;
};
/**
* Change HTML text to markdown text.
* @param {String} html - The text in HTML.
* @param {Boolean[]} [escapeArgs] - Arguments for the escaping of text formatting.
* @returns {String}
*/
function htmlToDiscord(html, ...escapeArgs) {
var text = '';
var parser = new htmlparser.Parser( {
onopentag: (tagname, attribs) => {
switch (tagname) {
case 'b':
text += '**';
break;
case 'i':
text += '*';
break;
case 's':
text += '~~';
break;
case 'u':
text += '__';
break;
}
},
ontext: (htmltext) => {
text += escapeFormatting(htmltext, ...escapeArgs);
},
onclosetag: (tagname) => {
switch (tagname) {
case 'b':
text += '**';
break;
case 'i':
text += '*';
break;
case 's':
text += '~~';
break;
case 'u':
text += '__';
break;
}
}
} );
parser.write( html );
parser.end();
return text;
};
/**
* Escapes formatting.
* @param {String} [text] - The text to modify.
* @param {Boolean} [isMarkdown] - The text contains markdown links.
* @param {Boolean} [keepLinks] - Don't escape non-markdown links.
* @returns {String}
*/
function escapeFormatting(text = '', isMarkdown = false, keepLinks = false) {
if ( !isMarkdown ) text = text.replace( /[()\\]/g, '\\$&' );
if ( !keepLinks ) text = text.replace( /\/\//g, '\\$&' );
return text.replace( /[`_*~:<>{}@|]/g, '\\$&' );
};
module.exports = {
got,
parse_infobox,
toFormatting,
toMarkdown,
toPlaintext,
htmlToPlain,
htmlToDiscord,
escapeFormatting
};