functions.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. import htmlparser from 'htmlparser2';
  2. import gotDefault from 'got';
  3. const got = gotDefault.extend( {
  4. throwHttpErrors: false,
  5. timeout: {
  6. request: 5000
  7. },
  8. headers: {
  9. 'User-Agent': 'Wiki-Bot/' + ( isDebug ? 'testing' : process.env.npm_package_version ) + ' (Discord; ' + process.env.npm_package_name + ( process.env.invite ? '; ' + process.env.invite : '' ) + ')'
  10. },
  11. responseType: 'json'
  12. } );
  13. /**
  14. * @type {Map<String, {state: String, wiki: String, channel: import('discord.js').TextChannel, user: String}>}
  15. */
  16. const oauthVerify = new Map();
  17. /**
  18. * Parse infobox content
  19. * @param {Object} infobox - The content of the infobox.
  20. * @param {import('discord.js').MessageEmbed} embed - The message embed.
  21. * @param {String} [thumbnail] - The default thumbnail for the wiki.
  22. * @param {String} [pagelink] - The article path for relative links.
  23. * @returns {import('discord.js').MessageEmbed?}
  24. */
  25. function parse_infobox(infobox, embed, thumbnail, pagelink = '') {
  26. if ( !infobox || embed.fields.length >= 25 || embed.length > 5400 ) return;
  27. if ( infobox.parser_tag_version === 5 ) {
  28. infobox.data.forEach( group => {
  29. parse_infobox(group, embed, thumbnail, pagelink);
  30. } );
  31. embed.fields = embed.fields.filter( (field, i, fields) => {
  32. if ( field.name !== '\u200b' || !field.value.startsWith( '__**' ) ) return true;
  33. return ( fields[i + 1]?.name && ( fields[i + 1].name !== '\u200b' || !fields[i + 1].value.startsWith( '__**' ) ) );
  34. } );
  35. return embed;
  36. }
  37. switch ( infobox.type ) {
  38. case 'data':
  39. var {label = '', value = '', source = '', 'item-name': name = ''} = infobox.data;
  40. label = htmlToPlain(label, true).trim();
  41. value = htmlToDiscord(value, pagelink).trim();
  42. if ( label.includes( '*UNKNOWN LINK*' ) ) {
  43. if ( !( source || name ) ) break;
  44. label = '`' + ( source || name ) + '`';
  45. embed.brokenInfobox = true;
  46. }
  47. if ( value.includes( '*UNKNOWN LINK*' ) ) {
  48. if ( !( source || name ) ) break;
  49. value = '`' + ( source || name ) + '`';
  50. embed.brokenInfobox = true;
  51. }
  52. if ( label.length > 100 ) label = label.substring(0, 100) + '\u2026';
  53. if ( value.length > 500 ) value = limitLength(value, 500, 250);
  54. if ( label && value ) embed.addField( label, value, true );
  55. break;
  56. case 'panel':
  57. var embedLength = embed.fields.length;
  58. infobox.data.value.forEach( group => {
  59. parse_infobox(group, embed, thumbnail, pagelink);
  60. } );
  61. embed.fields = embed.fields.filter( (field, i, fields) => {
  62. if ( i < embedLength || field.name !== '\u200b' ) return true;
  63. if ( !field.value.startsWith( '__**' ) ) return true;
  64. return ( fields[i + 1]?.name && fields[i + 1].name !== '\u200b' );
  65. } ).filter( (field, i, fields) => {
  66. if ( i < embedLength || field.name !== '\u200b' ) return true;
  67. if ( field.value.startsWith( '__**' ) ) return true;
  68. return ( fields[i + 1]?.name && ( fields[i + 1].name !== '\u200b' || !fields[i + 1].value.startsWith( '__**' ) ) );
  69. } );
  70. break;
  71. case 'section':
  72. var {label = ''} = infobox.data;
  73. label = htmlToPlain(label).trim();
  74. if ( label.length > 100 ) label = label.substring(0, 100) + '\u2026';
  75. if ( label ) embed.addField( '\u200b', '**' + label + '**', false );
  76. case 'group':
  77. infobox.data.value.forEach( group => {
  78. parse_infobox(group, embed, thumbnail, pagelink);
  79. } );
  80. break;
  81. case 'header':
  82. var {value = ''} = infobox.data;
  83. value = htmlToPlain(value).trim();
  84. if ( value.length > 100 ) value = value.substring(0, 100) + '\u2026';
  85. if ( value ) embed.addField( '\u200b', '__**' + value + '**__', false );
  86. break;
  87. case 'image':
  88. if ( embed.thumbnail?.url !== thumbnail ) return;
  89. var image = infobox.data.find( img => {
  90. return ( /^(?:https?:)?\/\//.test(img.url) && /\.(?:png|jpg|jpeg|gif)$/.test(img.name) );
  91. } );
  92. if ( image ) embed.setThumbnail( image.url.replace( /^(?:https?:)?\/\//, 'https://' ) );
  93. break;
  94. }
  95. }
  96. /**
  97. * Make wikitext formatting usage.
  98. * @param {String} [text] - The text to modify.
  99. * @param {Boolean} [showEmbed] - If the text is used in an embed.
  100. * @param {import('./wiki.js').default} [wiki] - The wiki.
  101. * @param {String} [title] - The page title.
  102. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  103. * @returns {String}
  104. */
  105. function toFormatting(text = '', showEmbed = false, wiki, title = '', fullWikitext = false) {
  106. if ( showEmbed ) return toMarkdown(text, wiki, title, fullWikitext);
  107. else return toPlaintext(text, fullWikitext);
  108. };
  109. /**
  110. * Turns wikitext formatting into markdown.
  111. * @param {String} [text] - The text to modify.
  112. * @param {import('./wiki.js').default} wiki - The wiki.
  113. * @param {String} [title] - The page title.
  114. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  115. * @returns {String}
  116. */
  117. function toMarkdown(text = '', wiki, title = '', fullWikitext = false) {
  118. text = text.replace( /[()\\]/g, '\\$&' );
  119. var link = null;
  120. var regex = /\[\[(?:([^\|\]]+)\|)?([^\]]+)\]\]([a-z]*)/g;
  121. while ( ( link = regex.exec(text) ) !== null ) {
  122. var pagetitle = ( link[1] || link[2] );
  123. var page = wiki.toLink(( /^[#\/]/.test(pagetitle) ? title + ( pagetitle.startsWith( '/' ) ? pagetitle : '' ) : pagetitle ), '', ( pagetitle.startsWith( '#' ) ? pagetitle.substring(1) : '' ), true);
  124. text = text.replaceSave( link[0], '[' + link[2] + link[3] + '](' + page + ')' );
  125. }
  126. if ( title !== '' ) {
  127. regex = /\/\*\s*([^\*]+?)\s*\*\/\s*(.)?/g;
  128. while ( ( link = regex.exec(text) ) !== null ) {
  129. text = text.replaceSave( link[0], '[→' + link[1] + '](' + wiki.toLink(title, '', link[1], true) + ')' + ( link[2] ? ': ' + link[2] : '' ) );
  130. }
  131. }
  132. if ( fullWikitext ) {
  133. regex = /\[(?:https?:)?\/\/([^ ]+) ([^\]]+)\]/g;
  134. while ( ( link = regex.exec(text) ) !== null ) {
  135. text = text.replaceSave( link[0], '[' + link[2] + '](https://' + link[1] + ')' );
  136. }
  137. return htmlToDiscord(text, '', true, true).replaceSave( /'''/g, '**' ).replaceSave( /''/g, '*' );
  138. }
  139. return escapeFormatting(text, true);
  140. };
  141. /**
  142. * Removes wikitext formatting.
  143. * @param {String} [text] - The text to modify.
  144. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  145. * @returns {String}
  146. */
  147. function toPlaintext(text = '', fullWikitext = false) {
  148. text = text.replace( /\[\[(?:[^\|\]]+\|)?([^\]]+)\]\]/g, '$1' ).replace( /\/\*\s*([^\*]+?)\s*\*\//g, '→$1:' );
  149. if ( fullWikitext ) {
  150. return htmlToDiscord( text.replace( /\[(?:https?:)?\/\/(?:[^ ]+) ([^\]]+)\]/g, '$1' ) );
  151. }
  152. else return escapeFormatting(text);
  153. };
  154. /**
  155. * Change HTML text to plain text.
  156. * @param {String} html - The text in HTML.
  157. * @returns {String}
  158. */
  159. function htmlToPlain(html, includeComments = false) {
  160. var text = '';
  161. var ignoredTag = '';
  162. var parser = new htmlparser.Parser( {
  163. onopentag: (tagname, attribs) => {
  164. if ( tagname === 'sup' && attribs.class === 'reference' ) ignoredTag = 'sup';
  165. if ( tagname === 'span' && attribs.class === 'smwttcontent' ) ignoredTag = 'span';
  166. if ( tagname === 'br' ) text += ' ';
  167. },
  168. ontext: (htmltext) => {
  169. if ( !ignoredTag ) {
  170. htmltext = htmltext.replace( /[\r\n\t ]+/g, ' ' );
  171. if ( /[\n ]$/.test(text) && htmltext.startsWith( ' ' ) ) htmltext = htmltext.replace( /^ +/, '' );
  172. text += escapeFormatting(htmltext);
  173. }
  174. },
  175. onclosetag: (tagname) => {
  176. if ( tagname === ignoredTag ) ignoredTag = '';
  177. },
  178. oncomment: (commenttext) => {
  179. if ( includeComments && /^(?:IW)?LINK'" \d+(?::\d+)?$/.test(commenttext) ) {
  180. text += '*UNKNOWN LINK*';
  181. }
  182. }
  183. } );
  184. parser.write( String(html) );
  185. parser.end();
  186. return text;
  187. };
  188. /**
  189. * Change HTML text to markdown text.
  190. * @param {String} html - The text in HTML.
  191. * @param {String} [pagelink] - The article path for relative links.
  192. * @param {Boolean[]} [escapeArgs] - Arguments for the escaping of text formatting.
  193. * @returns {String}
  194. */
  195. function htmlToDiscord(html, pagelink = '', ...escapeArgs) {
  196. var text = '';
  197. var code = false;
  198. var href = '';
  199. var ignoredTag = '';
  200. var syntaxhighlight = '';
  201. var listlevel = -1;
  202. var parser = new htmlparser.Parser( {
  203. onopentag: (tagname, attribs) => {
  204. if ( ignoredTag || code ) return;
  205. if ( tagname === 'sup' && attribs.class === 'reference' ) ignoredTag = 'sup';
  206. if ( tagname === 'span' && attribs.class === 'smwttcontent' ) ignoredTag = 'span';
  207. if ( tagname === 'code' ) {
  208. code = true;
  209. text += '`';
  210. }
  211. if ( tagname === 'pre' ) {
  212. code = true;
  213. text += '```' + syntaxhighlight + '\n';
  214. }
  215. if ( tagname === 'div' && attribs.class ) {
  216. let classes = attribs.class.split(' ');
  217. if ( classes.includes( 'mw-highlight' ) ) {
  218. syntaxhighlight = ( classes.find( syntax => syntax.startsWith( 'mw-highlight-lang-' ) )?.replace( 'mw-highlight-lang-', '' ) || '' );
  219. }
  220. }
  221. if ( tagname === 'b' || tagname === 'strong' ) text += '**';
  222. if ( tagname === 'i' ) text += '*';
  223. if ( tagname === 's' ) text += '~~';
  224. if ( tagname === 'u' ) text += '__';
  225. if ( tagname === 'br' ) {
  226. text += '\n';
  227. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel + 3);
  228. }
  229. if ( tagname === 'hr' ) {
  230. text = text.replace( / +$/, '' );
  231. if ( !text.endsWith( '\n' ) ) text += '\n';
  232. text += '─'.repeat(10) + '\n';
  233. }
  234. if ( tagname === 'p' && !text.endsWith( '\n' ) ) text += '\n';
  235. if ( tagname === 'ul' || tagname === 'ol' ) listlevel++;
  236. if ( tagname === 'li' ) {
  237. text = text.replace( / +$/, '' );
  238. if ( !text.endsWith( '\n' ) ) text += '\n';
  239. if ( attribs.class !== 'mw-empty-elt' ) {
  240. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
  241. text += '• ';
  242. }
  243. }
  244. if ( tagname === 'dl' ) listlevel++;
  245. if ( tagname === 'dt' ) {
  246. text = text.replace( / +$/, '' );
  247. if ( !text.endsWith( '\n' ) ) text += '\n';
  248. if ( attribs.class !== 'mw-empty-elt' ) {
  249. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
  250. text += '**';
  251. }
  252. }
  253. if ( tagname === 'dd' ) {
  254. text = text.replace( / +$/, '' );
  255. if ( !text.endsWith( '\n' ) ) text += '\n';
  256. if ( listlevel > -1 && attribs.class !== 'mw-empty-elt' ) text += '\u200b '.repeat(4 * (listlevel + 1));
  257. }
  258. if ( tagname === 'img' ) {
  259. if ( attribs.alt && attribs.src ) {
  260. let showAlt = true;
  261. if ( attribs['data-image-name'] === attribs.alt ) showAlt = false;
  262. else {
  263. let regex = new RegExp( '/([\\da-f])/\\1[\\da-f]/' + attribs.alt.replace( / /g, '_' ).replace( /\W/g, '\\$&' ) + '(?:/|\\?|$)' );
  264. if ( attribs.src.startsWith( 'data:' ) && attribs['data-src'] ) attribs.src = attribs['data-src'];
  265. if ( regex.test(attribs.src.replace( /(?:%[\dA-F]{2})+/g, partialURIdecode )) ) showAlt = false;
  266. }
  267. if ( showAlt ) {
  268. if ( href && !code ) attribs.alt = attribs.alt.replace( /[\[\]]/g, '\\$&' );
  269. if ( code ) text += attribs.alt.replace( /`/g, 'ˋ' );
  270. else text += escapeFormatting(attribs.alt, ...escapeArgs);
  271. }
  272. }
  273. }
  274. if ( tagname === 'h1' ) {
  275. text = text.replace( / +$/, '' );
  276. if ( !text.endsWith( '\n' ) ) text += '\n';
  277. text += '***__';
  278. }
  279. if ( tagname === 'h2' ) {
  280. text = text.replace( / +$/, '' );
  281. if ( !text.endsWith( '\n' ) ) text += '\n';
  282. text += '**__';
  283. }
  284. if ( tagname === 'h3' ) {
  285. text = text.replace( / +$/, '' );
  286. if ( !text.endsWith( '\n' ) ) text += '\n';
  287. text += '**';
  288. }
  289. if ( tagname === 'h4' ) {
  290. text = text.replace( / +$/, '' );
  291. if ( !text.endsWith( '\n' ) ) text += '\n';
  292. text += '__';
  293. }
  294. if ( tagname === 'h5' ) {
  295. text = text.replace( / +$/, '' );
  296. if ( !text.endsWith( '\n' ) ) text += '\n';
  297. text += '*';
  298. }
  299. if ( tagname === 'h6' ) {
  300. text = text.replace( / +$/, '' );
  301. if ( !text.endsWith( '\n' ) ) text += '\n';
  302. text += '';
  303. }
  304. if ( !pagelink ) return;
  305. if ( tagname === 'a' && attribs.href && attribs.class !== 'new' && /^(?:(?:https?:)?\/\/|\/|#)/.test(attribs.href) ) {
  306. href = new URL(attribs.href, pagelink).href.replace( /[()]/g, '\\$&' );
  307. if ( text.endsWith( '](<' + href + '>)' ) ) {
  308. text = text.substring(0, text.length - ( href.length + 5 ));
  309. }
  310. else text += '[';
  311. }
  312. },
  313. ontext: (htmltext) => {
  314. if ( !ignoredTag ) {
  315. if ( href && !code ) htmltext = htmltext.replace( /[\[\]]/g, '\\$&' );
  316. if ( code ) text += htmltext.replace( /`/g, 'ˋ' );
  317. else {
  318. htmltext = htmltext.replace( /[\r\n\t ]+/g, ' ' );
  319. if ( /[\n ]$/.test(text) && htmltext.startsWith( ' ' ) ) {
  320. htmltext = htmltext.replace( /^ +/, '' );
  321. }
  322. text += escapeFormatting(htmltext, ...escapeArgs);
  323. }
  324. }
  325. },
  326. onclosetag: (tagname) => {
  327. if ( tagname === ignoredTag ) {
  328. ignoredTag = '';
  329. return;
  330. }
  331. if ( code ) {
  332. if ( tagname === 'code' ) {
  333. code = false;
  334. text += '`';
  335. }
  336. if ( tagname === 'pre' ) {
  337. code = false;
  338. text += '\n```';
  339. }
  340. return;
  341. }
  342. if ( syntaxhighlight && tagname === 'div' ) syntaxhighlight = '';
  343. if ( tagname === 'b' ) text += '**';
  344. if ( tagname === 'i' ) text += '*';
  345. if ( tagname === 's' ) text += '~~';
  346. if ( tagname === 'u' ) text += '__';
  347. if ( tagname === 'ul' || tagname === 'ol' ) listlevel--;
  348. if ( tagname === 'dl' ) listlevel--;
  349. if ( tagname === 'dt' ) text += '**';
  350. if ( tagname === 'h1' ) text += '__***';
  351. if ( tagname === 'h2' ) text += '__**';
  352. if ( tagname === 'h3' ) text += '**';
  353. if ( tagname === 'h4' ) text += '__';
  354. if ( tagname === 'h5' ) text += '*';
  355. if ( tagname === 'h6' ) text += '';
  356. if ( !pagelink ) return;
  357. if ( tagname === 'a' && href ) {
  358. if ( text.endsWith( '[' ) ) text = text.substring(0, text.length - 1);
  359. else text += '](<' + href + '>)';
  360. href = '';
  361. }
  362. },
  363. oncomment: (commenttext) => {
  364. if ( pagelink && /^(?:IW)?LINK'" \d+(?::\d+)?$/.test(commenttext) ) {
  365. text += '*UNKNOWN LINK*';
  366. }
  367. }
  368. } );
  369. parser.write( String(html) );
  370. parser.end();
  371. return text;
  372. };
  373. /**
  374. * Escapes formatting.
  375. * @param {String} [text] - The text to modify.
  376. * @param {Boolean} [isMarkdown] - The text contains markdown links.
  377. * @param {Boolean} [keepLinks] - Don't escape non-markdown links.
  378. * @returns {String}
  379. */
  380. function escapeFormatting(text = '', isMarkdown = false, keepLinks = false) {
  381. if ( !isMarkdown ) text = text.replace( /\\/g, '\\\\' ).replace( /\]\(/g, ']\\(' );
  382. if ( !keepLinks ) text = text.replace( /\/\//g, '/\\/' );
  383. return text.replace( /[`_*~:<>{}@|]/g, '\\$&' );
  384. };
  385. /**
  386. * Limit text length without breaking link formatting.
  387. * @param {String} [text] - The text to modify.
  388. * @param {Number} [limit] - The character limit.
  389. * @param {Number} [maxExtra] - The maximal allowed character limit if needed.
  390. * @returns {String}
  391. */
  392. function limitLength(text = '', limit = 1000, maxExtra = 20) {
  393. var suffix = '\u2026';
  394. var link = null;
  395. var regex = /(?<!\\)\[((?:[^\[\]]|\\[\[\]])*?[^\\])\]\(<?(?:[^()]|\\[()])+?[^\\]>?\)/g;
  396. while ( ( link = regex.exec(text) ) !== null ) {
  397. if ( link.index < limit && link.index + link[0].length > limit ) {
  398. if ( link.index + link[0].length < limit + maxExtra ) suffix = link[0];
  399. else if ( link.index + link[1].length < limit + maxExtra ) suffix = link[1];
  400. if ( link.index + link[0].length < text.length ) suffix += '\u2026';
  401. limit = link.index;
  402. break;
  403. }
  404. else if ( link.index >= limit ) break;
  405. }
  406. return text.substring(0, limit) + suffix;
  407. };
  408. /**
  409. * Try to URI decode.
  410. * @param {String} m - The character to decode.
  411. * @returns {String}
  412. */
  413. function partialURIdecode(m) {
  414. var text = '';
  415. try {
  416. text = decodeURIComponent( m );
  417. }
  418. catch ( replaceError ) {
  419. if ( isDebug ) console.log( '- Failed to decode ' + m + ':' + replaceError );
  420. text = m;
  421. }
  422. return text;
  423. };
  424. /**
  425. * Allow users to delete their command responses.
  426. * @param {import('discord.js').Message} msg - The response.
  427. * @param {String} author - The user id.
  428. */
  429. function allowDelete(msg, author) {
  430. msg?.awaitReactions?.( {
  431. filter: (reaction, user) => ( reaction.emoji.name === '🗑️' && user.id === author ),
  432. max: 1, time: 300000
  433. } ).then( reaction => {
  434. if ( reaction.size ) msg.delete().catch(log_error);
  435. } );
  436. };
  437. /**
  438. * Sends an interaction response.
  439. * @param {import('discord.js').CommandInteraction|import('discord.js').ButtonInteraction} interaction - The interaction.
  440. * @param {import('discord.js').MessageOptions} message - The message.
  441. * @param {Boolean} [letDelete] - Let the interaction user delete the message.
  442. * @returns {Promise<import('discord.js').Message?>}
  443. */
  444. function sendMessage(interaction, message, letDelete = true) {
  445. if ( message?.embeds?.length && !message.embeds[0] ) message.embeds = [];
  446. return interaction.editReply( message ).then( msg => {
  447. if ( letDelete && (msg.flags & 64) !== 64 ) allowDelete(msg, interaction.user.id);
  448. return msg;
  449. }, log_error );
  450. };
  451. export {
  452. got,
  453. oauthVerify,
  454. parse_infobox,
  455. toFormatting,
  456. toMarkdown,
  457. toPlaintext,
  458. htmlToPlain,
  459. htmlToDiscord,
  460. escapeFormatting,
  461. limitLength,
  462. partialURIdecode,
  463. allowDelete,
  464. sendMessage
  465. };