functions.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. const htmlparser = require('htmlparser2');
  2. const got = require('got').extend( {
  3. throwHttpErrors: false,
  4. timeout: 5000,
  5. headers: {
  6. 'User-Agent': 'Wiki-Bot/' + ( isDebug ? 'testing' : process.env.npm_package_version ) + ' (Discord; ' + process.env.npm_package_name + ')'
  7. },
  8. responseType: 'json'
  9. } );
  10. const slashCommands = require('../interactions/commands.json');
  11. /**
  12. * @type {Map<String, {state: String, wiki: String, channel: import('discord.js').TextChannel, user: String}>}
  13. */
  14. const oauthVerify = new Map();
  15. /**
  16. * Parse infobox content
  17. * @param {Object} infobox - The content of the infobox.
  18. * @param {import('discord.js').MessageEmbed} embed - The message embed.
  19. * @param {String} [thumbnail] - The default thumbnail for the wiki.
  20. * @param {String} [pagelink] - The article path for relative links.
  21. * @returns {import('discord.js').MessageEmbed?}
  22. */
  23. function parse_infobox(infobox, embed, thumbnail, pagelink = '') {
  24. if ( !infobox || embed.fields.length >= 25 || embed.length > 5400 ) return;
  25. if ( infobox.parser_tag_version === 2 ) {
  26. infobox.data.forEach( group => {
  27. parse_infobox(group, embed, thumbnail, pagelink);
  28. } );
  29. embed.fields = embed.fields.filter( (field, i, fields) => {
  30. if ( field.name !== '\u200b' || !field.value.startsWith( '__**' ) ) return true;
  31. return ( fields[i + 1]?.name && ( fields[i + 1].name !== '\u200b' || !fields[i + 1].value.startsWith( '__**' ) ) );
  32. } );
  33. return embed;
  34. }
  35. switch ( infobox.type ) {
  36. case 'data':
  37. var {label = '', value = '', source = '', 'item-name': name = ''} = infobox.data;
  38. label = htmlToPlain(label).trim();
  39. value = htmlToDiscord(value, pagelink).trim();
  40. if ( label.includes( '*UNKNOWN LINK*' ) ) {
  41. if ( !( source || name ) ) break;
  42. label = '`' + ( source || name ) + '`';
  43. embed.brokenInfobox = true;
  44. }
  45. if ( value.includes( '*UNKNOWN LINK*' ) ) {
  46. if ( !( source || name ) ) break;
  47. value = '`' + ( source || name ) + '`';
  48. embed.brokenInfobox = true;
  49. }
  50. if ( label.length > 100 ) label = label.substring(0, 100) + '\u2026';
  51. if ( value.length > 500 ) value = limitLength(value, 500, 250);
  52. if ( label && value ) embed.addField( label, value, true );
  53. break;
  54. case 'panel':
  55. var embedLength = embed.fields.length;
  56. infobox.data.value.forEach( group => {
  57. parse_infobox(group, embed, thumbnail, pagelink);
  58. } );
  59. embed.fields = embed.fields.filter( (field, i, fields) => {
  60. if ( i < embedLength || field.name !== '\u200b' ) return true;
  61. if ( !field.value.startsWith( '__**' ) ) return true;
  62. return ( fields[i + 1]?.name && fields[i + 1].name !== '\u200b' );
  63. } ).filter( (field, i, fields) => {
  64. if ( i < embedLength || field.name !== '\u200b' ) return true;
  65. if ( field.value.startsWith( '__**' ) ) return true;
  66. return ( fields[i + 1]?.name && ( fields[i + 1].name !== '\u200b' || !fields[i + 1].value.startsWith( '__**' ) ) );
  67. } );
  68. break;
  69. case 'section':
  70. var {label = ''} = infobox.data;
  71. label = htmlToPlain(label).trim();
  72. if ( label.length > 100 ) label = label.substring(0, 100) + '\u2026';
  73. if ( label ) embed.addField( '\u200b', '**' + label + '**', false );
  74. case 'group':
  75. infobox.data.value.forEach( group => {
  76. parse_infobox(group, embed, thumbnail, pagelink);
  77. } );
  78. break;
  79. case 'header':
  80. var {value = ''} = infobox.data;
  81. value = htmlToPlain(value).trim();
  82. if ( value.length > 100 ) value = value.substring(0, 100) + '\u2026';
  83. if ( value ) embed.addField( '\u200b', '__**' + value + '**__', false );
  84. break;
  85. case 'image':
  86. if ( embed.thumbnail?.url !== thumbnail ) return;
  87. var image = infobox.data.find( img => {
  88. return ( /^(?:https?:)?\/\//.test(img.url) && /\.(?:png|jpg|jpeg|gif)$/.test(img.name) );
  89. } );
  90. if ( image ) embed.setThumbnail( image.url.replace( /^(?:https?:)?\/\//, 'https://' ) );
  91. break;
  92. }
  93. }
  94. /**
  95. * Make wikitext formatting usage.
  96. * @param {String} [text] - The text to modify.
  97. * @param {Boolean} [showEmbed] - If the text is used in an embed.
  98. * @param {import('./wiki.js')} [wiki] - The wiki.
  99. * @param {String} [title] - The page title.
  100. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  101. * @returns {String}
  102. */
  103. function toFormatting(text = '', showEmbed = false, wiki, title = '', fullWikitext = false) {
  104. if ( showEmbed ) return toMarkdown(text, wiki, title, fullWikitext);
  105. else return toPlaintext(text, fullWikitext);
  106. };
  107. /**
  108. * Turns wikitext formatting into markdown.
  109. * @param {String} [text] - The text to modify.
  110. * @param {import('./wiki.js')} wiki - The wiki.
  111. * @param {String} [title] - The page title.
  112. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  113. * @returns {String}
  114. */
  115. function toMarkdown(text = '', wiki, title = '', fullWikitext = false) {
  116. text = text.replace( /[()\\]/g, '\\$&' );
  117. var link = null;
  118. var regex = /\[\[(?:([^\|\]]+)\|)?([^\]]+)\]\]([a-z]*)/g;
  119. while ( ( link = regex.exec(text) ) !== null ) {
  120. var pagetitle = ( link[1] || link[2] );
  121. var page = wiki.toLink(( /^[#\/]/.test(pagetitle) ? title + ( pagetitle.startsWith( '/' ) ? pagetitle : '' ) : pagetitle ), '', ( pagetitle.startsWith( '#' ) ? pagetitle.substring(1) : '' ), true);
  122. text = text.replaceSave( link[0], '[' + link[2] + link[3] + '](' + page + ')' );
  123. }
  124. if ( title !== '' ) {
  125. regex = /\/\*\s*([^\*]+?)\s*\*\/\s*(.)?/g;
  126. while ( ( link = regex.exec(text) ) !== null ) {
  127. text = text.replaceSave( link[0], '[→' + link[1] + '](' + wiki.toLink(title, '', link[1], true) + ')' + ( link[2] ? ': ' + link[2] : '' ) );
  128. }
  129. }
  130. if ( fullWikitext ) {
  131. regex = /\[(?:https?:)?\/\/([^ ]+) ([^\]]+)\]/g;
  132. while ( ( link = regex.exec(text) ) !== null ) {
  133. text = text.replaceSave( link[0], '[' + link[2] + '](https://' + link[1] + ')' );
  134. }
  135. return htmlToDiscord(text, '', true, true).replaceSave( /'''/g, '**' ).replaceSave( /''/g, '*' );
  136. }
  137. return escapeFormatting(text, true);
  138. };
  139. /**
  140. * Removes wikitext formatting.
  141. * @param {String} [text] - The text to modify.
  142. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  143. * @returns {String}
  144. */
  145. function toPlaintext(text = '', fullWikitext = false) {
  146. text = text.replace( /\[\[(?:[^\|\]]+\|)?([^\]]+)\]\]/g, '$1' ).replace( /\/\*\s*([^\*]+?)\s*\*\//g, '→$1:' );
  147. if ( fullWikitext ) {
  148. return htmlToDiscord( text.replace( /\[(?:https?:)?\/\/(?:[^ ]+) ([^\]]+)\]/g, '$1' ) );
  149. }
  150. else return escapeFormatting(text);
  151. };
  152. /**
  153. * Change HTML text to plain text.
  154. * @param {String} html - The text in HTML.
  155. * @returns {String}
  156. */
  157. function htmlToPlain(html) {
  158. var text = '';
  159. var ignoredTag = '';
  160. var parser = new htmlparser.Parser( {
  161. onopentag: (tagname, attribs) => {
  162. if ( tagname === 'sup' && attribs.class === 'reference' ) ignoredTag = 'sup';
  163. if ( tagname === 'span' && attribs.class === 'smwttcontent' ) ignoredTag = 'span';
  164. if ( tagname === 'br' ) text += ' ';
  165. },
  166. ontext: (htmltext) => {
  167. if ( !ignoredTag ) {
  168. htmltext = htmltext.replace( /[\r\n\t ]+/g, ' ' );
  169. if ( /[\n ]$/.test(text) && htmltext.startsWith( ' ' ) ) htmltext = htmltext.replace( /^ +/, '' );
  170. text += escapeFormatting(htmltext);
  171. }
  172. },
  173. onclosetag: (tagname) => {
  174. if ( tagname === ignoredTag ) ignoredTag = '';
  175. }
  176. } );
  177. parser.write( html );
  178. parser.end();
  179. return text;
  180. };
  181. /**
  182. * Change HTML text to markdown text.
  183. * @param {String} html - The text in HTML.
  184. * @param {String} [pagelink] - The article path for relative links.
  185. * @param {Boolean[]} [escapeArgs] - Arguments for the escaping of text formatting.
  186. * @returns {String}
  187. */
  188. function htmlToDiscord(html, pagelink = '', ...escapeArgs) {
  189. var text = '';
  190. var code = false;
  191. var href = '';
  192. var ignoredTag = '';
  193. var syntaxhighlight = '';
  194. var listlevel = -1;
  195. var parser = new htmlparser.Parser( {
  196. onopentag: (tagname, attribs) => {
  197. if ( ignoredTag || code ) return;
  198. if ( tagname === 'sup' && attribs.class === 'reference' ) ignoredTag = 'sup';
  199. if ( tagname === 'span' && attribs.class === 'smwttcontent' ) ignoredTag = 'span';
  200. if ( tagname === 'code' ) {
  201. code = true;
  202. text += '`';
  203. }
  204. if ( tagname === 'pre' ) {
  205. code = true;
  206. text += '```' + syntaxhighlight + '\n';
  207. }
  208. if ( tagname === 'div' && attribs.class ) {
  209. let classes = attribs.class.split(' ');
  210. if ( classes.includes( 'mw-highlight' ) ) {
  211. syntaxhighlight = ( classes.find( syntax => syntax.startsWith( 'mw-highlight-lang-' ) )?.replace( 'mw-highlight-lang-', '' ) || '' );
  212. }
  213. }
  214. if ( tagname === 'b' ) text += '**';
  215. if ( tagname === 'i' ) text += '*';
  216. if ( tagname === 's' ) text += '~~';
  217. if ( tagname === 'u' ) text += '__';
  218. if ( tagname === 'br' ) {
  219. text += '\n';
  220. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel + 3);
  221. }
  222. if ( tagname === 'hr' ) {
  223. text = text.replace( / +$/, '' );
  224. if ( !text.endsWith( '\n' ) ) text += '\n';
  225. text += '─'.repeat(10) + '\n';
  226. }
  227. if ( tagname === 'p' && !text.endsWith( '\n' ) ) text += '\n';
  228. if ( tagname === 'ul' || tagname === 'ol' ) listlevel++;
  229. if ( tagname === 'li' ) {
  230. text = text.replace( / +$/, '' );
  231. if ( !text.endsWith( '\n' ) ) text += '\n';
  232. if ( attribs.class !== 'mw-empty-elt' ) {
  233. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
  234. text += '• ';
  235. }
  236. }
  237. if ( tagname === 'dl' ) listlevel++;
  238. if ( tagname === 'dt' ) {
  239. text = text.replace( / +$/, '' );
  240. if ( !text.endsWith( '\n' ) ) text += '\n';
  241. if ( attribs.class !== 'mw-empty-elt' ) {
  242. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
  243. text += '**';
  244. }
  245. }
  246. if ( tagname === 'dd' ) {
  247. text = text.replace( / +$/, '' );
  248. if ( !text.endsWith( '\n' ) ) text += '\n';
  249. if ( listlevel > -1 && attribs.class !== 'mw-empty-elt' ) text += '\u200b '.repeat(4 * (listlevel + 1));
  250. }
  251. if ( tagname === 'img' ) {
  252. if ( attribs.alt && attribs.src ) {
  253. let showAlt = true;
  254. if ( attribs['data-image-name'] === attribs.alt ) showAlt = false;
  255. else {
  256. let regex = new RegExp( '/([\\da-f])/\\1[\\da-f]/' + attribs.alt.replace( / /g, '_' ).replace( /\W/g, '\\$&' ) + '(?:/|\\?|$)' );
  257. if ( attribs.src.startsWith( 'data:' ) && attribs['data-src'] ) attribs.src = attribs['data-src'];
  258. if ( regex.test(attribs.src.replace( /(?:%[\dA-F]{2})+/g, partialURIdecode )) ) showAlt = false;
  259. }
  260. if ( showAlt ) {
  261. if ( href && !code ) attribs.alt = attribs.alt.replace( /[\[\]]/g, '\\$&' );
  262. if ( code ) text += attribs.alt.replace( /`/g, 'ˋ' );
  263. else text += escapeFormatting(attribs.alt, ...escapeArgs);
  264. }
  265. }
  266. }
  267. if ( tagname === 'h1' ) {
  268. text = text.replace( / +$/, '' );
  269. if ( !text.endsWith( '\n' ) ) text += '\n';
  270. text += '***__';
  271. }
  272. if ( tagname === 'h2' ) {
  273. text = text.replace( / +$/, '' );
  274. if ( !text.endsWith( '\n' ) ) text += '\n';
  275. text += '**__';
  276. }
  277. if ( tagname === 'h3' ) {
  278. text = text.replace( / +$/, '' );
  279. if ( !text.endsWith( '\n' ) ) text += '\n';
  280. text += '**';
  281. }
  282. if ( tagname === 'h4' ) {
  283. text = text.replace( / +$/, '' );
  284. if ( !text.endsWith( '\n' ) ) text += '\n';
  285. text += '__';
  286. }
  287. if ( tagname === 'h5' ) {
  288. text = text.replace( / +$/, '' );
  289. if ( !text.endsWith( '\n' ) ) text += '\n';
  290. text += '*';
  291. }
  292. if ( tagname === 'h6' ) {
  293. text = text.replace( / +$/, '' );
  294. if ( !text.endsWith( '\n' ) ) text += '\n';
  295. text += '';
  296. }
  297. if ( !pagelink ) return;
  298. if ( tagname === 'a' && attribs.href && attribs.class !== 'new' && /^(?:(?:https?:)?\/\/|\/|#)/.test(attribs.href) ) {
  299. href = new URL(attribs.href, pagelink).href.replace( /[()]/g, '\\$&' );
  300. if ( text.endsWith( '](<' + href + '>)' ) ) {
  301. text = text.substring(0, text.length - ( href.length + 5 ));
  302. }
  303. else text += '[';
  304. }
  305. },
  306. ontext: (htmltext) => {
  307. if ( !ignoredTag ) {
  308. if ( href && !code ) htmltext = htmltext.replace( /[\[\]]/g, '\\$&' );
  309. if ( code ) text += htmltext.replace( /`/g, 'ˋ' );
  310. else {
  311. htmltext = htmltext.replace( /[\r\n\t ]+/g, ' ' );
  312. if ( /[\n ]$/.test(text) && htmltext.startsWith( ' ' ) ) {
  313. htmltext = htmltext.replace( /^ +/, '' );
  314. }
  315. text += escapeFormatting(htmltext, ...escapeArgs);
  316. }
  317. }
  318. },
  319. onclosetag: (tagname) => {
  320. if ( tagname === ignoredTag ) {
  321. ignoredTag = '';
  322. return;
  323. }
  324. if ( code ) {
  325. if ( tagname === 'code' ) {
  326. code = false;
  327. text += '`';
  328. }
  329. if ( tagname === 'pre' ) {
  330. code = false;
  331. text += '\n```';
  332. }
  333. return;
  334. }
  335. if ( syntaxhighlight && tagname === 'div' ) syntaxhighlight = '';
  336. if ( tagname === 'b' ) text += '**';
  337. if ( tagname === 'i' ) text += '*';
  338. if ( tagname === 's' ) text += '~~';
  339. if ( tagname === 'u' ) text += '__';
  340. if ( tagname === 'ul' || tagname === 'ol' ) listlevel--;
  341. if ( tagname === 'dl' ) listlevel--;
  342. if ( tagname === 'dt' ) text += '**';
  343. if ( tagname === 'h1' ) text += '__***';
  344. if ( tagname === 'h2' ) text += '__**';
  345. if ( tagname === 'h3' ) text += '**';
  346. if ( tagname === 'h4' ) text += '__';
  347. if ( tagname === 'h5' ) text += '*';
  348. if ( tagname === 'h6' ) text += '';
  349. if ( !pagelink ) return;
  350. if ( tagname === 'a' && href ) {
  351. if ( text.endsWith( '[' ) ) text = text.substring(0, text.length - 1);
  352. else text += '](<' + href + '>)';
  353. href = '';
  354. }
  355. },
  356. oncomment: (commenttext) => {
  357. if ( pagelink && /^(?:IW)?LINK'" \d+:\d+$/.test(commenttext) ) {
  358. text += '*UNKNOWN LINK*';
  359. }
  360. }
  361. } );
  362. parser.write( html );
  363. parser.end();
  364. return text;
  365. };
  366. /**
  367. * Escapes formatting.
  368. * @param {String} [text] - The text to modify.
  369. * @param {Boolean} [isMarkdown] - The text contains markdown links.
  370. * @param {Boolean} [keepLinks] - Don't escape non-markdown links.
  371. * @returns {String}
  372. */
  373. function escapeFormatting(text = '', isMarkdown = false, keepLinks = false) {
  374. if ( !isMarkdown ) text = text.replace( /\\/g, '\\\\' ).replace( /\]\(/g, ']\\(' );
  375. if ( !keepLinks ) text = text.replace( /\/\//g, '/\\/' );
  376. return text.replace( /[`_*~:<>{}@|]/g, '\\$&' );
  377. };
  378. /**
  379. * Limit text length without breaking link formatting.
  380. * @param {String} [text] - The text to modify.
  381. * @param {Number} [limit] - The character limit.
  382. * @param {Number} [maxExtra] - The maximal allowed character limit if needed.
  383. * @returns {String}
  384. */
  385. function limitLength(text = '', limit = 1000, maxExtra = 20) {
  386. var suffix = '\u2026';
  387. var link = null;
  388. var regex = /(?<!\\)\[((?:[^\[\]]|\\[\[\]])*?[^\\])\]\(<?(?:[^()]|\\[()])+?[^\\]>?\)/g;
  389. while ( ( link = regex.exec(text) ) !== null ) {
  390. if ( link.index < limit && link.index + link[0].length > limit ) {
  391. if ( link.index + link[0].length < limit + maxExtra ) suffix = link[0];
  392. else if ( link.index + link[1].length < limit + maxExtra ) suffix = link[1];
  393. if ( link.index + link[0].length < text.length ) suffix += '\u2026';
  394. limit = link.index;
  395. break;
  396. }
  397. else if ( link.index >= limit ) break;
  398. }
  399. return text.substring(0, limit) + suffix;
  400. };
  401. /**
  402. * Try to URI decode.
  403. * @param {String} m - The character to decode.
  404. * @returns {String}
  405. */
  406. function partialURIdecode(m) {
  407. var text = '';
  408. try {
  409. text = decodeURIComponent( m );
  410. }
  411. catch ( replaceError ) {
  412. if ( isDebug ) console.log( '- Failed to decode ' + m + ':' + replaceError );
  413. text = m;
  414. }
  415. return text;
  416. };
  417. /**
  418. * Allow users to delete their command responses.
  419. * @param {import('discord.js').Message} msg - The response.
  420. * @param {String} author - The user id.
  421. */
  422. function allowDelete(msg, author) {
  423. msg.awaitReactions( (reaction, user) => reaction.emoji.name === '🗑️' && user.id === author, {max:1,time:300000} ).then( reaction => {
  424. if ( reaction.size ) {
  425. msg.delete().catch(log_error);
  426. }
  427. } );
  428. };
  429. /**
  430. * Sends an interaction response.
  431. * @param {Object} interaction - The interaction.
  432. * @param {Object} message - The message.
  433. * @param {String} message.content - The message content.
  434. * @param {{parse: String[], roles?: String[], users?: String[]}} message.allowed_mentions - The allowed mentions.
  435. * @param {import('discord.js').TextChannel} channel - The channel for the interaction.
  436. * @param {Boolean} [letDelete] - Let the interaction user delete the message.
  437. * @returns {Promise<import('discord.js').Message?>}
  438. */
  439. function sendMessage(interaction, message, channel, letDelete = true) {
  440. return interaction.client.api.webhooks(interaction.application_id, interaction.token).messages('@original').patch( {
  441. data: message
  442. } ).then( msg => {
  443. if ( !channel ) return;
  444. var responseMessage = channel.messages.add(msg);
  445. if ( letDelete ) allowDelete(responseMessage, ( interaction.member?.user.id || interaction.user.id ));
  446. return responseMessage;
  447. }, log_error );
  448. };
  449. module.exports = {
  450. got,
  451. slashCommands,
  452. oauthVerify,
  453. parse_infobox,
  454. toFormatting,
  455. toMarkdown,
  456. toPlaintext,
  457. htmlToPlain,
  458. htmlToDiscord,
  459. escapeFormatting,
  460. limitLength,
  461. partialURIdecode,
  462. allowDelete,
  463. sendMessage
  464. };