functions.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. const htmlparser = require('htmlparser2');
  2. const got = require('got').extend( {
  3. throwHttpErrors: false,
  4. timeout: 5000,
  5. headers: {
  6. 'User-Agent': 'Wiki-Bot/' + ( isDebug ? 'testing' : process.env.npm_package_version ) + ' (Discord; ' + process.env.npm_package_name + ')'
  7. },
  8. responseType: 'json'
  9. } );
  10. /**
  11. * Parse infobox content
  12. * @param {Object} infobox - The content of the infobox.
  13. * @param {import('discord.js').MessageEmbed} embed - The message embed.
  14. * @param {String} [thumbnail] - The default thumbnail for the wiki.
  15. * @param {String} [serverpath] - The article path for relative links.
  16. * @returns {import('discord.js').MessageEmbed?}
  17. */
  18. function parse_infobox(infobox, embed, thumbnail, serverpath = '') {
  19. if ( !infobox || embed.fields.length >= 25 || embed.length > 5400 ) return;
  20. if ( infobox.parser_tag_version === 2 ) {
  21. infobox.data.forEach( group => {
  22. parse_infobox(group, embed, thumbnail, serverpath);
  23. } );
  24. embed.fields = embed.fields.filter( (field, i, fields) => {
  25. if ( field.name !== '\u200b' || !field.value.startsWith( '__**' ) ) return true;
  26. return ( fields[i + 1]?.name && ( fields[i + 1].name !== '\u200b' || !fields[i + 1].value.startsWith( '__**' ) ) );
  27. } );
  28. return embed;
  29. }
  30. switch ( infobox.type ) {
  31. case 'data':
  32. var {label = '', value = '', source = '', 'item-name': name = ''} = infobox.data;
  33. label = htmlToPlain(label).trim();
  34. value = htmlToDiscord(value, serverpath, true).trim();
  35. if ( label.includes( '*UNKNOWN LINK*' ) ) {
  36. label = '`' + ( source || name ) + '`';
  37. embed.brokenInfobox = true;
  38. }
  39. if ( value.includes( '*UNKNOWN LINK*' ) ) {
  40. value = '`' + ( source || name ) + '`';
  41. embed.brokenInfobox = true;
  42. }
  43. if ( label.length > 100 ) label = label.substring(0, 100) + '\u2026';
  44. if ( value.length > 500 ) value = limitLength(value, 500, 250);
  45. if ( label && value ) embed.addField( label, value, true );
  46. break;
  47. case 'panel':
  48. var embedLength = embed.fields.length;
  49. infobox.data.value.forEach( group => {
  50. parse_infobox(group, embed, thumbnail, serverpath);
  51. } );
  52. embed.fields = embed.fields.filter( (field, i, fields) => {
  53. if ( i < embedLength || field.name !== '\u200b' ) return true;
  54. if ( !field.value.startsWith( '__**' ) ) return true;
  55. return ( fields[i + 1]?.name && fields[i + 1].name !== '\u200b' );
  56. } ).filter( (field, i, fields) => {
  57. if ( i < embedLength || field.name !== '\u200b' ) return true;
  58. if ( field.value.startsWith( '__**' ) ) return true;
  59. return ( fields[i + 1]?.name && ( fields[i + 1].name !== '\u200b' || !fields[i + 1].value.startsWith( '__**' ) ) );
  60. } );
  61. break;
  62. case 'section':
  63. var {label = ''} = infobox.data;
  64. label = htmlToPlain(label).trim();
  65. if ( label.length > 100 ) label = label.substring(0, 100) + '\u2026';
  66. if ( label ) embed.addField( '\u200b', '**' + label + '**', false );
  67. case 'group':
  68. infobox.data.value.forEach( group => {
  69. parse_infobox(group, embed, thumbnail, serverpath);
  70. } );
  71. break;
  72. case 'header':
  73. var {value = ''} = infobox.data;
  74. value = htmlToPlain(value).trim();
  75. if ( value.length > 100 ) value = value.substring(0, 100) + '\u2026';
  76. if ( value ) embed.addField( '\u200b', '__**' + value + '**__', false );
  77. break;
  78. case 'image':
  79. if ( embed.thumbnail?.url !== thumbnail ) return;
  80. var image = infobox.data.find( img => {
  81. return ( /^(?:https?:)?\/\//.test(img.url) && /\.(?:png|jpg|jpeg|gif)$/.test(img.name) );
  82. } );
  83. if ( image ) embed.setThumbnail( image.url.replace( /^(?:https?:)?\/\//, 'https://' ) );
  84. break;
  85. }
  86. }
  87. /**
  88. * Make wikitext formatting usage.
  89. * @param {String} [text] - The text to modify.
  90. * @param {Boolean} [showEmbed] - If the text is used in an embed.
  91. * @param {import('./wiki.js')} [wiki] - The wiki.
  92. * @param {String} [title] - The page title.
  93. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  94. * @returns {String}
  95. */
  96. function toFormatting(text = '', showEmbed = false, wiki, title = '', fullWikitext = false) {
  97. if ( showEmbed ) return toMarkdown(text, wiki, title, fullWikitext);
  98. else return toPlaintext(text, fullWikitext);
  99. };
  100. /**
  101. * Turns wikitext formatting into markdown.
  102. * @param {String} [text] - The text to modify.
  103. * @param {import('./wiki.js')} wiki - The wiki.
  104. * @param {String} [title] - The page title.
  105. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  106. * @returns {String}
  107. */
  108. function toMarkdown(text = '', wiki, title = '', fullWikitext = false) {
  109. text = text.replace( /[()\\]/g, '\\$&' );
  110. var link = null;
  111. var regex = /\[\[(?:([^\|\]]+)\|)?([^\]]+)\]\]([a-z]*)/g;
  112. while ( ( link = regex.exec(text) ) !== null ) {
  113. var pagetitle = ( link[1] || link[2] );
  114. var page = wiki.toLink(( /^[#\/]/.test(pagetitle) ? title + ( pagetitle.startsWith( '/' ) ? pagetitle : '' ) : pagetitle ), '', ( pagetitle.startsWith( '#' ) ? pagetitle.substring(1) : '' ), true);
  115. text = text.replaceSave( link[0], '[' + link[2] + link[3] + '](' + page + ')' );
  116. }
  117. if ( title !== '' ) {
  118. regex = /\/\*\s*([^\*]+?)\s*\*\/\s*(.)?/g;
  119. while ( ( link = regex.exec(text) ) !== null ) {
  120. text = text.replaceSave( link[0], '[→' + link[1] + '](' + wiki.toLink(title, '', link[1], true) + ')' + ( link[2] ? ': ' + link[2] : '' ) );
  121. }
  122. }
  123. if ( fullWikitext ) {
  124. regex = /\[(?:https?:)?\/\/([^ ]+) ([^\]]+)\]/g;
  125. while ( ( link = regex.exec(text) ) !== null ) {
  126. text = text.replaceSave( link[0], '[' + link[2] + '](https://' + link[1] + ')' );
  127. }
  128. return htmlToDiscord( text, '', true, true ).replaceSave( /'''/g, '**' ).replaceSave( /''/g, '*' );
  129. }
  130. return escapeFormatting(text, true);
  131. };
  132. /**
  133. * Removes wikitext formatting.
  134. * @param {String} [text] - The text to modify.
  135. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  136. * @returns {String}
  137. */
  138. function toPlaintext(text = '', fullWikitext = false) {
  139. text = text.replace( /\[\[(?:[^\|\]]+\|)?([^\]]+)\]\]/g, '$1' ).replace( /\/\*\s*([^\*]+?)\s*\*\//g, '→$1:' );
  140. if ( fullWikitext ) {
  141. return htmlToPlain( text.replace( /\[(?:https?:)?\/\/(?:[^ ]+) ([^\]]+)\]/g, '$1' ) );
  142. }
  143. else return escapeFormatting(text);
  144. };
  145. /**
  146. * Change HTML text to plain text.
  147. * @param {String} html - The text in HTML.
  148. * @returns {String}
  149. */
  150. function htmlToPlain(html) {
  151. var text = '';
  152. var reference = false;
  153. var listlevel = -1;
  154. var parser = new htmlparser.Parser( {
  155. onopentag: (tagname, attribs) => {
  156. if ( tagname === 'sup' && attribs.class === 'reference' ) reference = true;
  157. if ( tagname === 'br' ) {
  158. text += '\n';
  159. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel + 3);
  160. }
  161. if ( tagname === 'hr' ) {
  162. if ( !text.endsWith( '\n' ) ) text += '\n';
  163. text += '─'.repeat(10) + '\n';
  164. }
  165. if ( tagname === 'p' && !text.endsWith( '\n' ) ) text += '\n';
  166. if ( tagname === 'ul' ) listlevel++;
  167. if ( tagname === 'li' ) {
  168. text = text.replace( / +$/, '' );
  169. if ( !text.endsWith( '\n' ) ) text += '\n';
  170. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
  171. text += '• ';
  172. }
  173. if ( tagname === 'dl' ) listlevel++;
  174. if ( tagname === 'dt' ) {
  175. text = text.replace( / +$/, '' );
  176. if ( !text.endsWith( '\n' ) ) text += '\n';
  177. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
  178. }
  179. if ( tagname === 'dd' ) {
  180. text = text.replace( / +$/, '' );
  181. if ( !text.endsWith( '\n' ) ) text += '\n';
  182. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * (listlevel + 1));
  183. }
  184. if ( tagname === 'img' ) {
  185. if ( attribs.alt && attribs.src && !reference ) {
  186. let regex = new RegExp( '/([\\da-f])/\\1[\\da-f]/' + attribs.alt.replace( / /g, '_' ).replace( /\W/g, '\\$&' ) + '(?:/|\\?|$)' );
  187. if ( !regex.test(partialURIdecode(attribs.src)) ) text += escapeFormatting(attribs.alt);
  188. }
  189. }
  190. if ( tagname === 'h1' ) {
  191. text = text.replace( / +$/, '' );
  192. if ( !text.endsWith( '\n' ) ) text += '\n';
  193. text += '***__';
  194. }
  195. if ( tagname === 'h2' ) {
  196. text = text.replace( / +$/, '' );
  197. if ( !text.endsWith( '\n' ) ) text += '\n';
  198. text += '**__';
  199. }
  200. if ( tagname === 'h3' ) {
  201. text = text.replace( / +$/, '' );
  202. if ( !text.endsWith( '\n' ) ) text += '\n';
  203. text += '**';
  204. }
  205. if ( tagname === 'h4' ) {
  206. text = text.replace( / +$/, '' );
  207. if ( !text.endsWith( '\n' ) ) text += '\n';
  208. text += '__';
  209. }
  210. if ( tagname === 'h5' ) {
  211. text = text.replace( / +$/, '' );
  212. if ( !text.endsWith( '\n' ) ) text += '\n';
  213. text += '*';
  214. }
  215. if ( tagname === 'h6' ) {
  216. text = text.replace( / +$/, '' );
  217. if ( !text.endsWith( '\n' ) ) text += '\n';
  218. text += '';
  219. }
  220. },
  221. ontext: (htmltext) => {
  222. if ( !reference ) {
  223. text += escapeFormatting(htmltext);
  224. }
  225. },
  226. onclosetag: (tagname) => {
  227. if ( tagname === 'sup' ) reference = false;
  228. if ( tagname === 'ul' ) listlevel--;
  229. if ( tagname === 'dl' ) listlevel--;
  230. if ( tagname === 'h1' ) text += '__***';
  231. if ( tagname === 'h2' ) text += '__**';
  232. if ( tagname === 'h3' ) text += '**';
  233. if ( tagname === 'h4' ) text += '__';
  234. if ( tagname === 'h5' ) text += '*';
  235. if ( tagname === 'h6' ) text += '';
  236. },
  237. oncomment: (commenttext) => {
  238. if ( /^LINK'" \d+:\d+$/.test(commenttext) ) text += '*UNKNOWN LINK*';
  239. }
  240. } );
  241. parser.write( html );
  242. parser.end();
  243. return text;
  244. };
  245. /**
  246. * Change HTML text to markdown text.
  247. * @param {String} html - The text in HTML.
  248. * @param {String} [serverpath] - The article path for relative links.
  249. * @param {Boolean[]} [escapeArgs] - Arguments for the escaping of text formatting.
  250. * @returns {String}
  251. */
  252. function htmlToDiscord(html, serverpath = '', ...escapeArgs) {
  253. var text = '';
  254. var code = false;
  255. var href = '';
  256. var reference = false;
  257. var listlevel = -1;
  258. var parser = new htmlparser.Parser( {
  259. onopentag: (tagname, attribs) => {
  260. if ( code ) return;
  261. if ( tagname === 'code' ) {
  262. code = true;
  263. text += '`';
  264. }
  265. if ( tagname === 'b' ) text += '**';
  266. if ( tagname === 'i' ) text += '*';
  267. if ( tagname === 's' ) text += '~~';
  268. if ( tagname === 'u' ) text += '__';
  269. if ( !serverpath ) return;
  270. if ( tagname === 'sup' && attribs.class === 'reference' ) reference = true;
  271. if ( tagname === 'br' ) {
  272. text += '\n';
  273. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel + 3);
  274. }
  275. if ( tagname === 'hr' ) {
  276. text = text.replace( / +$/, '' );
  277. if ( !text.endsWith( '\n' ) ) text += '\n';
  278. text += '─'.repeat(10) + '\n';
  279. }
  280. if ( tagname === 'p' && !text.endsWith( '\n' ) ) text += '\n';
  281. if ( tagname === 'ul' ) listlevel++;
  282. if ( tagname === 'li' ) {
  283. text = text.replace( / +$/, '' );
  284. if ( !text.endsWith( '\n' ) ) text += '\n';
  285. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
  286. text += '• ';
  287. }
  288. if ( tagname === 'dl' ) listlevel++;
  289. if ( tagname === 'dt' ) {
  290. text = text.replace( / +$/, '' );
  291. if ( !text.endsWith( '\n' ) ) text += '\n';
  292. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
  293. text += '**';
  294. }
  295. if ( tagname === 'dd' ) {
  296. text = text.replace( / +$/, '' );
  297. if ( !text.endsWith( '\n' ) ) text += '\n';
  298. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * (listlevel + 1));
  299. }
  300. if ( tagname === 'img' ) {
  301. if ( attribs.alt && attribs.src && !reference ) {
  302. let regex = new RegExp( '/([\\da-f])/\\1[\\da-f]/' + attribs.alt.replace( / /g, '_' ).replace( /\W/g, '\\$&' ) + '(?:/|\\?|$)' );
  303. if ( !regex.test(partialURIdecode(attribs.src)) ) {
  304. if ( href && !code ) attribs.alt = attribs.alt.replace( /[\[\]]/g, '\\$&' );
  305. if ( code ) text += attribs.alt.replace( /`/g, 'ˋ' );
  306. else text += escapeFormatting(attribs.alt, ...escapeArgs);
  307. }
  308. }
  309. }
  310. if ( tagname === 'h1' ) {
  311. text = text.replace( / +$/, '' );
  312. if ( !text.endsWith( '\n' ) ) text += '\n';
  313. text += '***__';
  314. }
  315. if ( tagname === 'h2' ) {
  316. text = text.replace( / +$/, '' );
  317. if ( !text.endsWith( '\n' ) ) text += '\n';
  318. text += '**__';
  319. }
  320. if ( tagname === 'h3' ) {
  321. text = text.replace( / +$/, '' );
  322. if ( !text.endsWith( '\n' ) ) text += '\n';
  323. text += '**';
  324. }
  325. if ( tagname === 'h4' ) {
  326. text = text.replace( / +$/, '' );
  327. if ( !text.endsWith( '\n' ) ) text += '\n';
  328. text += '__';
  329. }
  330. if ( tagname === 'h5' ) {
  331. text = text.replace( / +$/, '' );
  332. if ( !text.endsWith( '\n' ) ) text += '\n';
  333. text += '*';
  334. }
  335. if ( tagname === 'h6' ) {
  336. text = text.replace( / +$/, '' );
  337. if ( !text.endsWith( '\n' ) ) text += '\n';
  338. text += '';
  339. }
  340. if ( tagname === 'a' && attribs.href && attribs.class !== 'new' && /^(?:(?:https?:)?\/)?\//.test(attribs.href) ) {
  341. href = new URL(attribs.href, serverpath).href;
  342. text += '[';
  343. }
  344. },
  345. ontext: (htmltext) => {
  346. if ( !reference ) {
  347. if ( href && !code ) htmltext = htmltext.replace( /[\[\]]/g, '\\$&' );
  348. if ( code ) text += htmltext.replace( /`/g, 'ˋ' );
  349. else text += escapeFormatting(htmltext, ...escapeArgs);
  350. }
  351. },
  352. onclosetag: (tagname) => {
  353. if ( code ) {
  354. if ( tagname === 'code' ) {
  355. code = false;
  356. text += '`';
  357. }
  358. return;
  359. }
  360. if ( tagname === 'b' ) text += '**';
  361. if ( tagname === 'i' ) text += '*';
  362. if ( tagname === 's' ) text += '~~';
  363. if ( tagname === 'u' ) text += '__';
  364. if ( !serverpath ) return;
  365. if ( tagname === 'sup' ) reference = false;
  366. if ( tagname === 'ul' ) listlevel--;
  367. if ( tagname === 'dl' ) listlevel--;
  368. if ( tagname === 'dt' ) text += '**';
  369. if ( tagname === 'h1' ) text += '__***';
  370. if ( tagname === 'h2' ) text += '__**';
  371. if ( tagname === 'h3' ) text += '**';
  372. if ( tagname === 'h4' ) text += '__';
  373. if ( tagname === 'h5' ) text += '*';
  374. if ( tagname === 'h6' ) text += '';
  375. if ( tagname === 'a' && href ) {
  376. if ( text.endsWith( '[' ) ) text = text.substring(0, text.length - 1);
  377. else text += '](<' + href.replace( /[()]/g, '\\$&' ) + '>)';
  378. href = '';
  379. }
  380. },
  381. oncomment: (commenttext) => {
  382. if ( serverpath && /^LINK'" \d+:\d+$/.test(commenttext) ) {
  383. text += '*UNKNOWN LINK*';
  384. }
  385. }
  386. } );
  387. parser.write( html );
  388. parser.end();
  389. return text;
  390. };
  391. /**
  392. * Escapes formatting.
  393. * @param {String} [text] - The text to modify.
  394. * @param {Boolean} [isMarkdown] - The text contains markdown links.
  395. * @param {Boolean} [keepLinks] - Don't escape non-markdown links.
  396. * @returns {String}
  397. */
  398. function escapeFormatting(text = '', isMarkdown = false, keepLinks = false) {
  399. if ( !isMarkdown ) text = text.replace( /[()\\]/g, '\\$&' );
  400. if ( !keepLinks ) text = text.replace( /\/\//g, '\\$&' );
  401. return text.replace( /[`_*~:<>{}@|]/g, '\\$&' );
  402. };
  403. /**
  404. * Limit text length without breaking link formatting.
  405. * @param {String} [text] - The text to modify.
  406. * @param {Number} [limit] - The character limit.
  407. * @param {Number} [maxExtra] - The maximal allowed character limit if needed.
  408. * @returns {String}
  409. */
  410. function limitLength(text = '', limit = 1000, maxExtra = 20) {
  411. var suffix = '\u2026';
  412. var link = null;
  413. var regex = /(?<!\\)\[((?:[^\[\]]|\\[\[\]])+?[^\\])\]\((?:[^()]|\\[()])+?[^\\]\)/g;
  414. while ( ( link = regex.exec(text) ) !== null ) {
  415. if ( link.index < limit && link.index + link[0].length > limit ) {
  416. limit = link.index;
  417. if ( link.index + link[0].length < limit + maxExtra ) suffix = link[0];
  418. else if ( link.index + link[1].length < limit + maxExtra ) suffix = link[1];
  419. if ( link.index + link[0].length < text.length ) suffix += '\u2026';
  420. break;
  421. }
  422. else if ( link.index >= limit ) break;
  423. }
  424. return text.substring(0, limit) + suffix;
  425. };
  426. /**
  427. * Try to URI decode.
  428. * @param {String} m - The character to decode.
  429. * @returns {String}
  430. */
  431. function partialURIdecode(m) {
  432. var text = '';
  433. try {
  434. text = decodeURIComponent( m );
  435. }
  436. catch ( replaceError ) {
  437. if ( isDebug ) console.log( '- Failed to decode ' + m + ':' + replaceError );
  438. text = m;
  439. }
  440. return text;
  441. };
  442. module.exports = {
  443. got,
  444. parse_infobox,
  445. toFormatting,
  446. toMarkdown,
  447. toPlaintext,
  448. htmlToPlain,
  449. htmlToDiscord,
  450. escapeFormatting,
  451. limitLength,
  452. partialURIdecode
  453. };