functions.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. const htmlparser = require('htmlparser2');
  2. const got = require('got').extend( {
  3. throwHttpErrors: false,
  4. timeout: 5000,
  5. headers: {
  6. 'User-Agent': 'Wiki-Bot/' + ( isDebug ? 'testing' : process.env.npm_package_version ) + ' (Discord; ' + process.env.npm_package_name + ')'
  7. },
  8. responseType: 'json'
  9. } );
  10. /**
  11. * Parse infobox content
  12. * @param {Object} infobox - The content of the infobox.
  13. * @param {import('discord.js').MessageEmbed} embed - The message embed.
  14. * @param {String} [thumbnail] - The default thumbnail for the wiki.
  15. * @param {String} [pagelink] - The article path for relative links.
  16. * @returns {import('discord.js').MessageEmbed?}
  17. */
  18. function parse_infobox(infobox, embed, thumbnail, pagelink = '') {
  19. if ( !infobox || embed.fields.length >= 25 || embed.length > 5400 ) return;
  20. if ( infobox.parser_tag_version === 2 ) {
  21. infobox.data.forEach( group => {
  22. parse_infobox(group, embed, thumbnail, pagelink);
  23. } );
  24. embed.fields = embed.fields.filter( (field, i, fields) => {
  25. if ( field.name !== '\u200b' || !field.value.startsWith( '__**' ) ) return true;
  26. return ( fields[i + 1]?.name && ( fields[i + 1].name !== '\u200b' || !fields[i + 1].value.startsWith( '__**' ) ) );
  27. } );
  28. return embed;
  29. }
  30. switch ( infobox.type ) {
  31. case 'data':
  32. var {label = '', value = '', source = '', 'item-name': name = ''} = infobox.data;
  33. label = htmlToPlain(label).trim();
  34. value = htmlToDiscord(value, pagelink, true).trim();
  35. if ( label.includes( '*UNKNOWN LINK*' ) ) {
  36. if ( !( source || name ) ) break;
  37. label = '`' + ( source || name ) + '`';
  38. embed.brokenInfobox = true;
  39. }
  40. if ( value.includes( '*UNKNOWN LINK*' ) ) {
  41. if ( !( source || name ) ) break;
  42. value = '`' + ( source || name ) + '`';
  43. embed.brokenInfobox = true;
  44. }
  45. if ( label.length > 100 ) label = label.substring(0, 100) + '\u2026';
  46. if ( value.length > 500 ) value = limitLength(value, 500, 250);
  47. if ( label && value ) embed.addField( label, value, true );
  48. break;
  49. case 'panel':
  50. var embedLength = embed.fields.length;
  51. infobox.data.value.forEach( group => {
  52. parse_infobox(group, embed, thumbnail, pagelink);
  53. } );
  54. embed.fields = embed.fields.filter( (field, i, fields) => {
  55. if ( i < embedLength || field.name !== '\u200b' ) return true;
  56. if ( !field.value.startsWith( '__**' ) ) return true;
  57. return ( fields[i + 1]?.name && fields[i + 1].name !== '\u200b' );
  58. } ).filter( (field, i, fields) => {
  59. if ( i < embedLength || field.name !== '\u200b' ) return true;
  60. if ( field.value.startsWith( '__**' ) ) return true;
  61. return ( fields[i + 1]?.name && ( fields[i + 1].name !== '\u200b' || !fields[i + 1].value.startsWith( '__**' ) ) );
  62. } );
  63. break;
  64. case 'section':
  65. var {label = ''} = infobox.data;
  66. label = htmlToPlain(label).trim();
  67. if ( label.length > 100 ) label = label.substring(0, 100) + '\u2026';
  68. if ( label ) embed.addField( '\u200b', '**' + label + '**', false );
  69. case 'group':
  70. infobox.data.value.forEach( group => {
  71. parse_infobox(group, embed, thumbnail, pagelink);
  72. } );
  73. break;
  74. case 'header':
  75. var {value = ''} = infobox.data;
  76. value = htmlToPlain(value).trim();
  77. if ( value.length > 100 ) value = value.substring(0, 100) + '\u2026';
  78. if ( value ) embed.addField( '\u200b', '__**' + value + '**__', false );
  79. break;
  80. case 'image':
  81. if ( embed.thumbnail?.url !== thumbnail ) return;
  82. var image = infobox.data.find( img => {
  83. return ( /^(?:https?:)?\/\//.test(img.url) && /\.(?:png|jpg|jpeg|gif)$/.test(img.name) );
  84. } );
  85. if ( image ) embed.setThumbnail( image.url.replace( /^(?:https?:)?\/\//, 'https://' ) );
  86. break;
  87. }
  88. }
  89. /**
  90. * Make wikitext formatting usage.
  91. * @param {String} [text] - The text to modify.
  92. * @param {Boolean} [showEmbed] - If the text is used in an embed.
  93. * @param {import('./wiki.js')} [wiki] - The wiki.
  94. * @param {String} [title] - The page title.
  95. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  96. * @returns {String}
  97. */
  98. function toFormatting(text = '', showEmbed = false, wiki, title = '', fullWikitext = false) {
  99. if ( showEmbed ) return toMarkdown(text, wiki, title, fullWikitext);
  100. else return toPlaintext(text, fullWikitext);
  101. };
  102. /**
  103. * Turns wikitext formatting into markdown.
  104. * @param {String} [text] - The text to modify.
  105. * @param {import('./wiki.js')} wiki - The wiki.
  106. * @param {String} [title] - The page title.
  107. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  108. * @returns {String}
  109. */
  110. function toMarkdown(text = '', wiki, title = '', fullWikitext = false) {
  111. text = text.replace( /[()\\]/g, '\\$&' );
  112. var link = null;
  113. var regex = /\[\[(?:([^\|\]]+)\|)?([^\]]+)\]\]([a-z]*)/g;
  114. while ( ( link = regex.exec(text) ) !== null ) {
  115. var pagetitle = ( link[1] || link[2] );
  116. var page = wiki.toLink(( /^[#\/]/.test(pagetitle) ? title + ( pagetitle.startsWith( '/' ) ? pagetitle : '' ) : pagetitle ), '', ( pagetitle.startsWith( '#' ) ? pagetitle.substring(1) : '' ), true);
  117. text = text.replaceSave( link[0], '[' + link[2] + link[3] + '](' + page + ')' );
  118. }
  119. if ( title !== '' ) {
  120. regex = /\/\*\s*([^\*]+?)\s*\*\/\s*(.)?/g;
  121. while ( ( link = regex.exec(text) ) !== null ) {
  122. text = text.replaceSave( link[0], '[→' + link[1] + '](' + wiki.toLink(title, '', link[1], true) + ')' + ( link[2] ? ': ' + link[2] : '' ) );
  123. }
  124. }
  125. if ( fullWikitext ) {
  126. regex = /\[(?:https?:)?\/\/([^ ]+) ([^\]]+)\]/g;
  127. while ( ( link = regex.exec(text) ) !== null ) {
  128. text = text.replaceSave( link[0], '[' + link[2] + '](https://' + link[1] + ')' );
  129. }
  130. return htmlToDiscord( text, '', true, true ).replaceSave( /'''/g, '**' ).replaceSave( /''/g, '*' );
  131. }
  132. return escapeFormatting(text, true);
  133. };
  134. /**
  135. * Removes wikitext formatting.
  136. * @param {String} [text] - The text to modify.
  137. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  138. * @returns {String}
  139. */
  140. function toPlaintext(text = '', fullWikitext = false) {
  141. text = text.replace( /\[\[(?:[^\|\]]+\|)?([^\]]+)\]\]/g, '$1' ).replace( /\/\*\s*([^\*]+?)\s*\*\//g, '→$1:' );
  142. if ( fullWikitext ) {
  143. return htmlToDiscord( text.replace( /\[(?:https?:)?\/\/(?:[^ ]+) ([^\]]+)\]/g, '$1' ) );
  144. }
  145. else return escapeFormatting(text);
  146. };
  147. /**
  148. * Change HTML text to plain text.
  149. * @param {String} html - The text in HTML.
  150. * @returns {String}
  151. */
  152. function htmlToPlain(html) {
  153. var text = '';
  154. var ignoredTag = '';
  155. var parser = new htmlparser.Parser( {
  156. onopentag: (tagname, attribs) => {
  157. if ( tagname === 'sup' && attribs.class === 'reference' ) ignoredTag = 'sup';
  158. if ( tagname === 'span' && attribs.class === 'smwttcontent' ) ignoredTag = 'span';
  159. },
  160. ontext: (htmltext) => {
  161. if ( !ignoredTag ) text += escapeFormatting(htmltext);
  162. },
  163. onclosetag: (tagname) => {
  164. if ( tagname === ignoredTag ) ignoredTag = '';
  165. }
  166. } );
  167. parser.write( html );
  168. parser.end();
  169. return text;
  170. };
  171. /**
  172. * Change HTML text to markdown text.
  173. * @param {String} html - The text in HTML.
  174. * @param {String} [pagelink] - The article path for relative links.
  175. * @param {Boolean[]} [escapeArgs] - Arguments for the escaping of text formatting.
  176. * @returns {String}
  177. */
  178. function htmlToDiscord(html, pagelink = '', ...escapeArgs) {
  179. var text = '';
  180. var code = false;
  181. var href = '';
  182. var ignoredTag = '';
  183. var listlevel = -1;
  184. var parser = new htmlparser.Parser( {
  185. onopentag: (tagname, attribs) => {
  186. if ( ignoredTag || code ) return;
  187. if ( tagname === 'sup' && attribs.class === 'reference' ) ignoredTag = 'sup';
  188. if ( tagname === 'span' && attribs.class === 'smwttcontent' ) ignoredTag = 'span';
  189. if ( tagname === 'code' ) {
  190. code = true;
  191. text += '`';
  192. }
  193. if ( tagname === 'pre' ) {
  194. code = true;
  195. text += '```\n';
  196. }
  197. if ( tagname === 'b' ) text += '**';
  198. if ( tagname === 'i' ) text += '*';
  199. if ( tagname === 's' ) text += '~~';
  200. if ( tagname === 'u' ) text += '__';
  201. if ( tagname === 'br' ) {
  202. text += '\n';
  203. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel + 3);
  204. }
  205. if ( tagname === 'hr' ) {
  206. text = text.replace( / +$/, '' );
  207. if ( !text.endsWith( '\n' ) ) text += '\n';
  208. text += '─'.repeat(10) + '\n';
  209. }
  210. if ( tagname === 'p' && !text.endsWith( '\n' ) ) text += '\n';
  211. if ( tagname === 'ul' ) listlevel++;
  212. if ( tagname === 'li' ) {
  213. text = text.replace( / +$/, '' );
  214. if ( !text.endsWith( '\n' ) ) text += '\n';
  215. if ( attribs.class !== 'mw-empty-elt' ) {
  216. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
  217. text += '• ';
  218. }
  219. }
  220. if ( tagname === 'dl' ) listlevel++;
  221. if ( tagname === 'dt' ) {
  222. text = text.replace( / +$/, '' );
  223. if ( !text.endsWith( '\n' ) ) text += '\n';
  224. if ( attribs.class !== 'mw-empty-elt' ) {
  225. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
  226. text += '**';
  227. }
  228. }
  229. if ( tagname === 'dd' ) {
  230. text = text.replace( / +$/, '' );
  231. if ( !text.endsWith( '\n' ) ) text += '\n';
  232. if ( listlevel > -1 && attribs.class !== 'mw-empty-elt' ) text += '\u200b '.repeat(4 * (listlevel + 1));
  233. }
  234. if ( tagname === 'img' ) {
  235. if ( attribs.alt && attribs.src ) {
  236. let showAlt = true;
  237. if ( attribs['data-image-name'] === attribs.alt ) showAlt = false;
  238. else {
  239. let regex = new RegExp( '/([\\da-f])/\\1[\\da-f]/' + attribs.alt.replace( / /g, '_' ).replace( /\W/g, '\\$&' ) + '(?:/|\\?|$)' );
  240. if ( attribs.src.startsWith( 'data:' ) && attribs['data-src'] ) attribs.src = attribs['data-src'];
  241. if ( regex.test(attribs.src.replace( /(?:%[\dA-F]{2})+/g, partialURIdecode )) ) showAlt = false;
  242. }
  243. if ( showAlt ) {
  244. if ( href && !code ) attribs.alt = attribs.alt.replace( /[\[\]]/g, '\\$&' );
  245. if ( code ) text += attribs.alt.replace( /`/g, 'ˋ' );
  246. else text += escapeFormatting(attribs.alt, ...escapeArgs);
  247. }
  248. }
  249. }
  250. if ( tagname === 'h1' ) {
  251. text = text.replace( / +$/, '' );
  252. if ( !text.endsWith( '\n' ) ) text += '\n';
  253. text += '***__';
  254. }
  255. if ( tagname === 'h2' ) {
  256. text = text.replace( / +$/, '' );
  257. if ( !text.endsWith( '\n' ) ) text += '\n';
  258. text += '**__';
  259. }
  260. if ( tagname === 'h3' ) {
  261. text = text.replace( / +$/, '' );
  262. if ( !text.endsWith( '\n' ) ) text += '\n';
  263. text += '**';
  264. }
  265. if ( tagname === 'h4' ) {
  266. text = text.replace( / +$/, '' );
  267. if ( !text.endsWith( '\n' ) ) text += '\n';
  268. text += '__';
  269. }
  270. if ( tagname === 'h5' ) {
  271. text = text.replace( / +$/, '' );
  272. if ( !text.endsWith( '\n' ) ) text += '\n';
  273. text += '*';
  274. }
  275. if ( tagname === 'h6' ) {
  276. text = text.replace( / +$/, '' );
  277. if ( !text.endsWith( '\n' ) ) text += '\n';
  278. text += '';
  279. }
  280. if ( !pagelink ) return;
  281. if ( tagname === 'a' && attribs.href && attribs.class !== 'new' && /^(?:(?:https?:)?\/\/|\/|#)/.test(attribs.href) ) {
  282. href = new URL(attribs.href, pagelink).href;
  283. if ( text.endsWith( '](<' + href.replace( /[()]/g, '\\$&' ) + '>)' ) ) {
  284. text = text.substring(0, text.length - ( href.replace( /[()]/g, '\\$&' ).length + 5 ));
  285. }
  286. else text += '[';
  287. }
  288. },
  289. ontext: (htmltext) => {
  290. if ( !ignoredTag ) {
  291. if ( href && !code ) htmltext = htmltext.replace( /[\[\]]/g, '\\$&' );
  292. if ( code ) text += htmltext.replace( /`/g, 'ˋ' );
  293. else text += escapeFormatting(htmltext, ...escapeArgs);
  294. }
  295. },
  296. onclosetag: (tagname) => {
  297. if ( tagname === ignoredTag ) {
  298. ignoredTag = '';
  299. return;
  300. }
  301. if ( code ) {
  302. if ( tagname === 'code' ) {
  303. code = false;
  304. text += '`';
  305. }
  306. if ( tagname === 'pre' ) {
  307. code = false;
  308. text += '\n```';
  309. }
  310. return;
  311. }
  312. if ( tagname === 'b' ) text += '**';
  313. if ( tagname === 'i' ) text += '*';
  314. if ( tagname === 's' ) text += '~~';
  315. if ( tagname === 'u' ) text += '__';
  316. if ( tagname === 'ul' ) listlevel--;
  317. if ( tagname === 'dl' ) listlevel--;
  318. if ( tagname === 'dt' ) text += '**';
  319. if ( tagname === 'h1' ) text += '__***';
  320. if ( tagname === 'h2' ) text += '__**';
  321. if ( tagname === 'h3' ) text += '**';
  322. if ( tagname === 'h4' ) text += '__';
  323. if ( tagname === 'h5' ) text += '*';
  324. if ( tagname === 'h6' ) text += '';
  325. if ( !pagelink ) return;
  326. if ( tagname === 'a' && href ) {
  327. if ( text.endsWith( '[' ) ) text = text.substring(0, text.length - 1);
  328. else text += '](<' + href.replace( /[()]/g, '\\$&' ) + '>)';
  329. href = '';
  330. }
  331. },
  332. oncomment: (commenttext) => {
  333. if ( pagelink && /^LINK'" \d+:\d+$/.test(commenttext) ) {
  334. text += '*UNKNOWN LINK*';
  335. }
  336. }
  337. } );
  338. parser.write( html );
  339. parser.end();
  340. return text;
  341. };
  342. /**
  343. * Escapes formatting.
  344. * @param {String} [text] - The text to modify.
  345. * @param {Boolean} [isMarkdown] - The text contains markdown links.
  346. * @param {Boolean} [keepLinks] - Don't escape non-markdown links.
  347. * @returns {String}
  348. */
  349. function escapeFormatting(text = '', isMarkdown = false, keepLinks = false) {
  350. if ( !isMarkdown ) text = text.replace( /[()\\]/g, '\\$&' );
  351. if ( !keepLinks ) text = text.replace( /\/\//g, '\\$&' );
  352. return text.replace( /[`_*~:<>{}@|]/g, '\\$&' );
  353. };
  354. /**
  355. * Limit text length without breaking link formatting.
  356. * @param {String} [text] - The text to modify.
  357. * @param {Number} [limit] - The character limit.
  358. * @param {Number} [maxExtra] - The maximal allowed character limit if needed.
  359. * @returns {String}
  360. */
  361. function limitLength(text = '', limit = 1000, maxExtra = 20) {
  362. var suffix = '\u2026';
  363. var link = null;
  364. var regex = /(?<!\\)\[((?:[^\[\]]|\\[\[\]])*?[^\\])\]\(<?(?:[^()]|\\[()])+?[^\\]>?\)/g;
  365. while ( ( link = regex.exec(text) ) !== null ) {
  366. if ( link.index < limit && link.index + link[0].length > limit ) {
  367. if ( link.index + link[0].length < limit + maxExtra ) suffix = link[0];
  368. else if ( link.index + link[1].length < limit + maxExtra ) suffix = link[1];
  369. if ( link.index + link[0].length < text.length ) suffix += '\u2026';
  370. limit = link.index;
  371. break;
  372. }
  373. else if ( link.index >= limit ) break;
  374. }
  375. return text.substring(0, limit) + suffix;
  376. };
  377. /**
  378. * Try to URI decode.
  379. * @param {String} m - The character to decode.
  380. * @returns {String}
  381. */
  382. function partialURIdecode(m) {
  383. var text = '';
  384. try {
  385. text = decodeURIComponent( m );
  386. }
  387. catch ( replaceError ) {
  388. if ( isDebug ) console.log( '- Failed to decode ' + m + ':' + replaceError );
  389. text = m;
  390. }
  391. return text;
  392. };
  393. /**
  394. * Allow users to delete their command responses.
  395. * @param {import('discord.js').Message} msg - The response.
  396. * @param {String} author - The user id.
  397. */
  398. function allowDelete(msg, author) {
  399. msg.awaitReactions( (reaction, user) => reaction.emoji.name === '🗑️' && user.id === author, {max:1,time:300000} ).then( reaction => {
  400. if ( reaction.size ) {
  401. msg.delete().catch(log_error);
  402. }
  403. } );
  404. };
  405. module.exports = {
  406. got,
  407. parse_infobox,
  408. toFormatting,
  409. toMarkdown,
  410. toPlaintext,
  411. htmlToPlain,
  412. htmlToDiscord,
  413. escapeFormatting,
  414. limitLength,
  415. partialURIdecode,
  416. allowDelete
  417. };