import htmlparser from 'htmlparser2';
import gotDefault from 'got';
const got = gotDefault.extend( {
throwHttpErrors: false,
timeout: {
request: 5_000
},
headers: {
'User-Agent': 'Wiki-Bot/' + ( isDebug ? 'testing' : process.env.npm_package_version ) + ' (Discord; ' + process.env.npm_package_name + ( process.env.invite ? '; ' + process.env.invite : '' ) + ')'
},
responseType: 'json'
} );
/**
* @type {Map}
*/
const oauthVerify = new Map();
/**
* Parse infobox content
* @param {Object} infobox - The content of the infobox.
* @param {import('discord.js').MessageEmbed} embed - The message embed.
* @param {String} [thumbnail] - The default thumbnail for the wiki.
* @param {String} [pagelink] - The article path for relative links.
* @returns {import('discord.js').MessageEmbed?}
*/
function parse_infobox(infobox, embed, thumbnail, pagelink = '') {
if ( !infobox || embed.fields.length >= 25 || embed.length > 5400 ) return;
if ( infobox.parser_tag_version === 5 ) {
infobox.data.forEach( group => {
parse_infobox(group, embed, thumbnail, pagelink);
} );
embed.fields = embed.fields.filter( (field, i, fields) => {
if ( field.name !== '\u200b' || !field.value.startsWith( '__**' ) ) return true;
return ( fields[i + 1]?.name && ( fields[i + 1].name !== '\u200b' || !fields[i + 1].value.startsWith( '__**' ) ) );
} );
return embed;
}
switch ( infobox.type ) {
case 'data':
var {label = '', value = '', source = '', 'item-name': name = ''} = infobox.data;
label = htmlToPlain(label, true).trim();
value = htmlToDiscord(value, pagelink).trim();
if ( label.includes( '*UNKNOWN LINK*' ) ) {
if ( !( source || name ) ) break;
label = '`' + ( source || name ) + '`';
embed.brokenInfobox = true;
}
if ( value.includes( '*UNKNOWN LINK*' ) ) {
if ( !( source || name ) ) break;
value = '`' + ( source || name ) + '`';
embed.brokenInfobox = true;
}
if ( label.length > 100 ) label = label.substring(0, 100) + '\u2026';
if ( value.length > 500 ) value = limitLength(value, 500, 250);
if ( label && value ) embed.addField( label, value, true );
break;
case 'panel':
var embedLength = embed.fields.length;
infobox.data.value.forEach( group => {
parse_infobox(group, embed, thumbnail, pagelink);
} );
embed.fields = embed.fields.filter( (field, i, fields) => {
if ( i < embedLength || field.name !== '\u200b' ) return true;
if ( !field.value.startsWith( '__**' ) ) return true;
return ( fields[i + 1]?.name && fields[i + 1].name !== '\u200b' );
} ).filter( (field, i, fields) => {
if ( i < embedLength || field.name !== '\u200b' ) return true;
if ( field.value.startsWith( '__**' ) ) return true;
return ( fields[i + 1]?.name && ( fields[i + 1].name !== '\u200b' || !fields[i + 1].value.startsWith( '__**' ) ) );
} );
break;
case 'section':
var {label = ''} = infobox.data;
label = htmlToPlain(label).trim();
if ( label.length > 100 ) label = label.substring(0, 100) + '\u2026';
if ( label ) embed.addField( '\u200b', '**' + label + '**', false );
case 'group':
infobox.data.value.forEach( group => {
parse_infobox(group, embed, thumbnail, pagelink);
} );
break;
case 'header':
var {value = ''} = infobox.data;
value = htmlToPlain(value).trim();
if ( value.length > 100 ) value = value.substring(0, 100) + '\u2026';
if ( value ) embed.addField( '\u200b', '__**' + value + '**__', false );
break;
case 'image':
if ( embed.thumbnail?.url !== thumbnail ) return;
var image = infobox.data.find( img => {
return ( /^(?:https?:)?\/\//.test(img.url) && /\.(?:png|jpg|jpeg|gif)$/.test(img.name) );
} );
if ( image ) embed.setThumbnail( image.url.replace( /^(?:https?:)?\/\//, 'https://' ) );
break;
}
}
/**
* Make wikitext formatting usage.
* @param {String} [text] - The text to modify.
* @param {Boolean} [showEmbed] - If the text is used in an embed.
* @param {import('./wiki.js').default} [wiki] - The wiki.
* @param {String} [title] - The page title.
* @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
* @returns {String}
*/
function toFormatting(text = '', showEmbed = false, wiki, title = '', fullWikitext = false) {
if ( showEmbed ) return toMarkdown(text, wiki, title, fullWikitext);
else return toPlaintext(text, fullWikitext);
};
/**
* Turns wikitext formatting into markdown.
* @param {String} [text] - The text to modify.
* @param {import('./wiki.js').default} wiki - The wiki.
* @param {String} [title] - The page title.
* @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
* @returns {String}
*/
function toMarkdown(text = '', wiki, title = '', fullWikitext = false) {
text = text.replace( /[()\\]/g, '\\$&' );
var link = null;
var regex = /\[\[(?:([^\|\]]+)\|)?([^\]]+)\]\]([a-z]*)/g;
while ( ( link = regex.exec(text) ) !== null ) {
var pagetitle = ( link[1] || link[2] );
var page = wiki.toLink(( /^[#\/]/.test(pagetitle) ? title + ( pagetitle.startsWith( '/' ) ? pagetitle : '' ) : pagetitle ), '', ( pagetitle.startsWith( '#' ) ? pagetitle.substring(1) : '' ), true);
text = text.replaceSave( link[0], '[' + link[2] + link[3] + '](' + page + ')' );
}
if ( title !== '' ) {
regex = /\/\*\s*([^\*]+?)\s*\*\/\s*(.)?/g;
while ( ( link = regex.exec(text) ) !== null ) {
text = text.replaceSave( link[0], '[→' + link[1] + '](' + wiki.toLink(title, '', link[1], true) + ')' + ( link[2] ? ': ' + link[2] : '' ) );
}
}
if ( fullWikitext ) {
regex = /\[(?:https?:)?\/\/([^ ]+) ([^\]]+)\]/g;
while ( ( link = regex.exec(text) ) !== null ) {
text = text.replaceSave( link[0], '[' + link[2] + '](https://' + link[1] + ')' );
}
return htmlToDiscord(text, '', true, true).replaceSave( /'''/g, '**' ).replaceSave( /''/g, '*' );
}
return escapeFormatting(text, true);
};
/**
* Removes wikitext formatting.
* @param {String} [text] - The text to modify.
* @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
* @returns {String}
*/
function toPlaintext(text = '', fullWikitext = false) {
text = text.replace( /\[\[(?:[^\|\]]+\|)?([^\]]+)\]\]/g, '$1' ).replace( /\/\*\s*([^\*]+?)\s*\*\//g, '→$1:' );
if ( fullWikitext ) {
return htmlToDiscord( text.replace( /\[(?:https?:)?\/\/(?:[^ ]+) ([^\]]+)\]/g, '$1' ) );
}
else return escapeFormatting(text);
};
/**
* Change HTML text to plain text.
* @param {String} html - The text in HTML.
* @returns {String}
*/
function htmlToPlain(html, includeComments = false) {
var text = '';
var ignoredTag = '';
var parser = new htmlparser.Parser( {
onopentag: (tagname, attribs) => {
let classes = ( attribs.class?.split(' ') ?? [] );
if ( classes.includes( 'noexcerpt' ) || ( classes.includes( 'mw-collapsible' ) && classes.includes( 'mw-collapsed' ) )
|| ( attribs.style?.includes( 'display' ) && /(^|;)\s*display\s*:\s*none\s*(;|$)/.test(attribs.style) ) ) {
ignoredTag = tagname;
return;
}
if ( tagname === 'sup' && classes.includes( 'reference' ) ) ignoredTag = 'sup';
if ( tagname === 'span' && classes.includes( 'smwttcontent' ) ) ignoredTag = 'span';
if ( tagname === 'br' ) text += ' ';
},
ontext: (htmltext) => {
if ( !ignoredTag ) {
htmltext = htmltext.replace( /[\r\n\t ]+/g, ' ' );
if ( /[\n ]$/.test(text) && htmltext.startsWith( ' ' ) ) htmltext = htmltext.replace( /^ +/, '' );
text += escapeFormatting(htmltext);
}
},
onclosetag: (tagname) => {
if ( tagname === ignoredTag ) ignoredTag = '';
},
oncomment: (commenttext) => {
if ( includeComments && /^(?:IW)?LINK'" \d+(?::\d+)?$/.test(commenttext) ) {
text += '*UNKNOWN LINK*';
}
}
} );
parser.write( String(html) );
parser.end();
return text;
};
/**
* Change HTML text to markdown text.
* @param {String} html - The text in HTML.
* @param {String} [pagelink] - The article path for relative links.
* @param {Boolean[]} [escapeArgs] - Arguments for the escaping of text formatting.
* @returns {String}
*/
function htmlToDiscord(html, pagelink = '', ...escapeArgs) {
var text = '';
var code = false;
var href = '';
var ignoredTag = '';
var syntaxhighlight = '';
var listlevel = -1;
var horizontalList = '';
var parser = new htmlparser.Parser( {
onopentag: (tagname, attribs) => {
if ( ignoredTag || code ) return;
let classes = ( attribs.class?.split(' ') ?? [] );
if ( classes.includes( 'noexcerpt' ) || classes.includes( 'mw-empty-elt' ) || ( classes.includes( 'mw-collapsible' ) && classes.includes( 'mw-collapsed' ) )
|| ( attribs.style?.includes( 'display' ) && /(^|;)\s*display\s*:\s*none\s*(;|$)/.test(attribs.style) ) ) {
ignoredTag = tagname;
return;
}
if ( classes.includes( 'hlist' ) ) horizontalList = tagname;
if ( tagname === 'sup' && classes.includes( 'reference' ) ) ignoredTag = 'sup';
if ( tagname === 'span' && classes.includes( 'smwttcontent' ) ) ignoredTag = 'span';
if ( tagname === 'code' ) {
code = true;
text += '`';
}
if ( tagname === 'pre' ) {
code = true;
text += '```' + syntaxhighlight + '\n';
}
if ( tagname === 'div' && classes.length ) {
if ( classes.includes( 'mw-highlight' ) ) {
syntaxhighlight = ( classes.find( syntax => syntax.startsWith( 'mw-highlight-lang-' ) )?.replace( 'mw-highlight-lang-', '' ) || '' );
}
}
if ( tagname === 'b' || tagname === 'strong' ) text += '**';
if ( tagname === 'i' ) text += '*';
if ( tagname === 's' ) text += '~~';
if ( tagname === 'u' ) text += '__';
if ( tagname === 'br' ) {
text += '\n';
if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel + 3);
}
if ( tagname === 'hr' ) {
text = text.replace( / +$/, '' );
if ( !text.endsWith( '\n' ) ) text += '\n';
text += '─'.repeat(10) + '\n';
}
if ( tagname === 'p' && !text.endsWith( '\n' ) ) text += '\n';
if ( tagname === 'ul' || tagname === 'ol' || tagname === 'dl' ) {
if ( ++listlevel ) text += ' (';
}
if ( tagname === 'li' && !horizontalList ) {
text = text.replace( /[ \u200b]+$/, '' );
if ( !text.endsWith( '\n' ) ) text += '\n';
if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
text += '• ';
}
if ( tagname === 'dt' && !horizontalList ) {
text = text.replace( /[ \u200b]+$/, '' );
if ( !text.endsWith( '\n' ) ) text += '\n';
if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
text += '**';
}
if ( tagname === 'dd' && !horizontalList ) {
text = text.replace( /[ \u200b]+$/, '' );
if ( !text.endsWith( '\n' ) ) text += '\n';
if ( listlevel > -1 ) text += '\u200b '.repeat(4 * (listlevel + 1));
}
if ( tagname === 'img' ) {
if ( attribs.alt && attribs.src ) {
let showAlt = true;
if ( attribs['data-image-name'] === attribs.alt ) showAlt = false;
else {
let regex = new RegExp( '/([\\da-f])/\\1[\\da-f]/' + attribs.alt.replace( / /g, '_' ).replace( /\W/g, '\\$&' ) + '(?:/|\\?|$)' );
if ( attribs.src.startsWith( 'data:' ) && attribs['data-src'] ) attribs.src = attribs['data-src'];
if ( regex.test(attribs.src.replace( /(?:%[\dA-F]{2})+/g, partialURIdecode )) ) showAlt = false;
}
if ( showAlt ) {
if ( href && !code ) attribs.alt = attribs.alt.replace( /[\[\]]/g, '\\$&' );
if ( code ) text += attribs.alt.replace( /`/g, 'ˋ' );
else text += escapeFormatting(attribs.alt, ...escapeArgs);
}
}
}
if ( tagname === 'h1' ) {
text = text.replace( / +$/, '' );
if ( !text.endsWith( '\n' ) ) text += '\n';
text += '***__';
}
if ( tagname === 'h2' ) {
text = text.replace( / +$/, '' );
if ( !text.endsWith( '\n' ) ) text += '\n';
text += '**__';
}
if ( tagname === 'h3' ) {
text = text.replace( / +$/, '' );
if ( !text.endsWith( '\n' ) ) text += '\n';
text += '**';
}
if ( tagname === 'h4' ) {
text = text.replace( / +$/, '' );
if ( !text.endsWith( '\n' ) ) text += '\n';
text += '__';
}
if ( tagname === 'h5' ) {
text = text.replace( / +$/, '' );
if ( !text.endsWith( '\n' ) ) text += '\n';
text += '*';
}
if ( tagname === 'h6' ) {
text = text.replace( / +$/, '' );
if ( !text.endsWith( '\n' ) ) text += '\n';
text += '';
}
if ( !pagelink ) return;
if ( tagname === 'a' && attribs.href && !classes.includes( 'new' ) && /^(?:(?:https?:)?\/\/|\/|#)/.test(attribs.href) ) {
href = new URL(attribs.href, pagelink).href.replace( /[()]/g, '\\$&' );
if ( text.endsWith( '](<' + href + '>)' ) ) {
text = text.substring(0, text.length - ( href.length + 5 ));
}
else text += '[';
}
},
ontext: (htmltext) => {
if ( !ignoredTag ) {
if ( href && !code ) htmltext = htmltext.replace( /[\[\]]/g, '\\$&' );
if ( code ) text += htmltext.replace( /`/g, 'ˋ' );
else {
htmltext = htmltext.replace( /[\r\n\t ]+/g, ' ' );
if ( /[\n ]$/.test(text) && htmltext.startsWith( ' ' ) ) {
htmltext = htmltext.replace( /^ +/, '' );
}
text += escapeFormatting(htmltext, ...escapeArgs);
}
}
},
onclosetag: (tagname) => {
if ( tagname === ignoredTag ) {
ignoredTag = '';
return;
}
if ( code ) {
if ( tagname === 'code' ) {
code = false;
text += '`';
}
if ( tagname === 'pre' ) {
code = false;
text += '\n```';
}
return;
}
if ( syntaxhighlight && tagname === 'div' ) syntaxhighlight = '';
if ( tagname === 'b' || tagname === 'strong' ) text += '**';
if ( tagname === 'i' ) text += '*';
if ( tagname === 's' ) text += '~~';
if ( tagname === 'u' ) text += '__';
if ( tagname === 'dl' && horizontalList ) text = text.replace( /: $/, '' );
if ( tagname === 'ul' || tagname === 'ol' || tagname === 'dl' ) {
if ( horizontalList ) text = text.replace( / • $/, '' );
if ( listlevel-- ) text += ')';
}
if ( ( tagname === 'li' || tagname === 'dd' ) && horizontalList ) text += ' • ';
if ( tagname === 'dt' ) {
text += '**';
if ( horizontalList ) text += ': ';
}
if ( tagname === horizontalList ) horizontalList = '';
if ( tagname === 'h1' ) text += '__***';
if ( tagname === 'h2' ) text += '__**';
if ( tagname === 'h3' ) text += '**';
if ( tagname === 'h4' ) text += '__';
if ( tagname === 'h5' ) text += '*';
if ( tagname === 'h6' ) text += '';
if ( !pagelink ) return;
if ( tagname === 'a' && href ) {
if ( text.endsWith( '[' ) ) text = text.substring(0, text.length - 1);
else text += '](<' + href + '>)';
href = '';
}
},
oncomment: (commenttext) => {
if ( pagelink && /^(?:IW)?LINK'" \d+(?::\d+)?$/.test(commenttext) ) {
text += '*UNKNOWN LINK*';
}
}
} );
parser.write( String(html) );
parser.end();
return text;
};
/**
* Escapes formatting.
* @param {String} [text] - The text to modify.
* @param {Boolean} [isMarkdown] - The text contains markdown links.
* @param {Boolean} [keepLinks] - Don't escape non-markdown links.
* @returns {String}
*/
function escapeFormatting(text = '', isMarkdown = false, keepLinks = false) {
if ( !isMarkdown ) text = text.replace( /\\/g, '\\\\' ).replace( /\]\(/g, ']\\(' );
text = text.replace( /[`_*~:<>{}@|]/g, '\\$&' ).replace( /\/\//g, '/\\/' );
if ( keepLinks ) text = text.replace( /(?:\\<)?https?\\:\/\\\/(?:[^\(\)\s]+(?=\))|[^\[\]\s]+(?=\])|[^<>\s]+>?)/g, match => {
return match.replace( /\\\\/g, '/' ).replace( /\\/g, '' );
} );
return text;
};
/**
* Limit text length without breaking link formatting.
* @param {String} [text] - The text to modify.
* @param {Number} [limit] - The character limit.
* @param {Number} [maxExtra] - The maximal allowed character limit if needed.
* @returns {String}
*/
function limitLength(text = '', limit = 1000, maxExtra = 20) {
var suffix = '\u2026';
var link = null;
var regex = /(??\)/g;
while ( ( link = regex.exec(text) ) !== null ) {
if ( link.index < limit && link.index + link[0].length > limit ) {
if ( link.index + link[0].length < limit + maxExtra ) suffix = link[0];
else if ( link.index + link[1].length < limit + maxExtra ) suffix = link[1];
if ( link.index + link[0].length < text.length ) suffix += '\u2026';
limit = link.index;
break;
}
else if ( link.index >= limit ) break;
}
return text.substring(0, limit) + suffix;
};
/**
* Try to URI decode.
* @param {String} m - The character to decode.
* @returns {String}
*/
function partialURIdecode(m) {
var text = '';
try {
text = decodeURIComponent( m );
}
catch ( replaceError ) {
if ( isDebug ) console.log( '- Failed to decode ' + m + ':' + replaceError );
text = m;
}
return text;
};
/**
* Check for timeout or pause.
* @param {import('discord.js').Message|import('discord.js').Interaction} msg - The message.
* @param {Boolean} [ignorePause] - Ignore pause for admins.
* @returns {Boolean}
*/
function breakOnTimeoutPause(msg, ignorePause = false) {
if ( !msg.inGuild() ) return false;
if ( msg.member?.isCommunicationDisabled() ) {
console.log( '- Aborted, communication disabled for User.' );
return true;
}
if ( msg.guild?.me.isCommunicationDisabled() ) {
console.log( '- Aborted, communication disabled for Wiki-Bot.' );
return true;
}
if ( pausedGuilds.has(msg.guildId) && !( ignorePause && ( msg.isAdmin() || msg.isOwner() ) ) ) {
console.log( '- Aborted, guild paused.' );
return true;
};
return false;
};
/**
* Allow users to delete their command responses.
* @param {import('discord.js').Message} msg - The response.
* @param {String} author - The user id.
*/
function allowDelete(msg, author) {
msg?.awaitReactions?.( {
filter: (reaction, user) => ( reaction.emoji.name === '🗑️' && user.id === author ),
max: 1, time: 300_000
} ).then( reaction => {
if ( reaction.size ) msg.delete().catch(log_error);
} );
};
/**
* Sends an interaction response.
* @param {import('discord.js').CommandInteraction|import('discord.js').ButtonInteraction} interaction - The interaction.
* @param {import('discord.js').MessageOptions} message - The message.
* @param {Boolean} [letDelete] - Let the interaction user delete the message.
* @returns {Promise}
*/
function sendMessage(interaction, message, letDelete = true) {
if ( !interaction.ephemeral && letDelete && breakOnTimeoutPause(interaction) ) return Promise.resolve();
if ( message?.embeds?.length && !message.embeds[0] ) message.embeds = [];
return interaction.editReply( message ).then( msg => {
if ( letDelete && (msg.flags & 64) !== 64 ) allowDelete(msg, interaction.user.id);
return msg;
}, log_error );
};
export {
got,
oauthVerify,
parse_infobox,
toFormatting,
toMarkdown,
toPlaintext,
htmlToPlain,
htmlToDiscord,
escapeFormatting,
limitLength,
partialURIdecode,
breakOnTimeoutPause,
allowDelete,
sendMessage
};