functions.js 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. const htmlparser = require('htmlparser2');
  2. const got = require('got').extend( {
  3. throwHttpErrors: false,
  4. timeout: 5000,
  5. headers: {
  6. 'User-Agent': 'Wiki-Bot/' + ( isDebug ? 'testing' : process.env.npm_package_version ) + ' (Discord; ' + process.env.npm_package_name + ')'
  7. },
  8. responseType: 'json'
  9. } );
  10. /**
  11. * Parse infobox content
  12. * @param {Object} infobox - The content of the infobox.
  13. * @param {import('discord.js').MessageEmbed} embed - The message embed.
  14. * @param {String} [thumbnail] - The default thumbnail for the wiki.
  15. */
  16. function parse_infobox(infobox, embed, thumbnail) {
  17. if ( embed.fields.length >= 25 || embed.length > 5500 ) return;
  18. switch ( infobox.type ) {
  19. case 'data':
  20. var {label = '', value = '', source = ''} = infobox.data;
  21. label = htmlToPlain(label).trim();
  22. value = htmlToPlain(value).trim();
  23. if ( label.includes( '*UNKNOWN LINK*' ) ) {
  24. label = '`' + source + '`';
  25. embed.brokenInfobox = true;
  26. }
  27. if ( value.includes( '*UNKNOWN LINK*' ) ) {
  28. value = '`' + source + '`';
  29. embed.brokenInfobox = true;
  30. }
  31. if ( label.length > 50 ) label = label.substring(0, 50) + '\u2026';
  32. if ( value.length > 250 ) value = value.substring(0, 250) + '\u2026';
  33. if ( label && value ) embed.addField( label, value, true );
  34. break;
  35. case 'group':
  36. infobox.data.value.forEach( group => {
  37. parse_infobox(group, embed, thumbnail);
  38. } );
  39. break;
  40. case 'header':
  41. var {value = ''} = infobox.data;
  42. value = htmlToPlain(value).trim();
  43. if ( value.length > 100 ) value = value.substring(0, 100) + '\u2026';
  44. if ( value ) embed.addField( '\u200b', '__**' + value + '**__', false );
  45. break;
  46. case 'image':
  47. if ( embed.thumbnail?.url !== thumbnail ) return;
  48. var image = infobox.data.find( img => {
  49. return ( /^(?:https?:)?\/\//.test(img.url) && /\.(?:png|jpg|jpeg|gif)$/.test(img.name) );
  50. } );
  51. if ( image ) embed.setThumbnail( image.url );
  52. break;
  53. }
  54. }
  55. /**
  56. * Make wikitext formatting usage.
  57. * @param {String} [text] - The text to modify.
  58. * @param {Boolean} [showEmbed] - If the text is used in an embed.
  59. * @param {import('./wiki.js')} [wiki] - The wiki.
  60. * @param {String} [title] - The page title.
  61. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  62. * @returns {String}
  63. */
  64. function toFormatting(text = '', showEmbed = false, wiki, title = '', fullWikitext = false) {
  65. if ( showEmbed ) return toMarkdown(text, wiki, title, fullWikitext);
  66. else return toPlaintext(text, fullWikitext);
  67. };
  68. /**
  69. * Turns wikitext formatting into markdown.
  70. * @param {String} [text] - The text to modify.
  71. * @param {import('./wiki.js')} wiki - The wiki.
  72. * @param {String} [title] - The page title.
  73. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  74. * @returns {String}
  75. */
  76. function toMarkdown(text = '', wiki, title = '', fullWikitext = false) {
  77. text = text.replace( /[()\\]/g, '\\$&' );
  78. var link = null;
  79. var regex = /\[\[(?:([^\|\]]+)\|)?([^\]]+)\]\]([a-z]*)/g;
  80. while ( ( link = regex.exec(text) ) !== null ) {
  81. var pagetitle = ( link[1] || link[2] );
  82. var page = wiki.toLink(( /^[#\/]/.test(pagetitle) ? title + ( pagetitle.startsWith( '/' ) ? pagetitle : '' ) : pagetitle ), '', ( pagetitle.startsWith( '#' ) ? pagetitle.substring(1) : '' ), true);
  83. text = text.replaceSave( link[0], '[' + link[2] + link[3] + '](' + page + ')' );
  84. }
  85. if ( title !== '' ) {
  86. regex = /\/\*\s*([^\*]+?)\s*\*\/\s*(.)?/g;
  87. while ( ( link = regex.exec(text) ) !== null ) {
  88. text = text.replaceSave( link[0], '[→' + link[1] + '](' + wiki.toLink(title, '', link[1], true) + ')' + ( link[2] ? ': ' + link[2] : '' ) );
  89. }
  90. }
  91. if ( fullWikitext ) {
  92. regex = /\[(?:https?:)?\/\/([^ ]+) ([^\]]+)\]/g;
  93. while ( ( link = regex.exec(text) ) !== null ) {
  94. text = text.replaceSave( link[0], '[' + link[2] + '](https://' + link[1] + ')' );
  95. }
  96. return htmlToDiscord( text, true, true ).replaceSave( /'''/g, '**' ).replaceSave( /''/g, '*' );
  97. }
  98. return escapeFormatting(text, true);
  99. };
  100. /**
  101. * Removes wikitext formatting.
  102. * @param {String} [text] - The text to modify.
  103. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  104. * @returns {String}
  105. */
  106. function toPlaintext(text = '', fullWikitext = false) {
  107. text = text.replace( /\[\[(?:[^\|\]]+\|)?([^\]]+)\]\]/g, '$1' ).replace( /\/\*\s*([^\*]+?)\s*\*\//g, '→$1:' );
  108. if ( fullWikitext ) {
  109. return htmlToPlain( text.replace( /\[(?:https?:)?\/\/(?:[^ ]+) ([^\]]+)\]/g, '$1' ) );
  110. }
  111. else return escapeFormatting(text);
  112. };
  113. /**
  114. * Change HTML text to plain text.
  115. * @param {String} html - The text in HTML.
  116. * @returns {String}
  117. */
  118. function htmlToPlain(html) {
  119. var text = '';
  120. var reference = false;
  121. var listlevel = -1;
  122. var parser = new htmlparser.Parser( {
  123. onopentag: (tagname, attribs) => {
  124. if ( tagname === 'sup' && attribs.class === 'reference' ) reference = true;
  125. if ( tagname === 'br' ) {
  126. text += '\n';
  127. if ( listlevel > -1 ) text += '\u200b '.repeat(4*listlevel+3);
  128. }
  129. if ( tagname === 'ul' ) listlevel++;
  130. if ( tagname === 'li' ) text += '\n' + '\u200b '.repeat(4*listlevel) + '• ';
  131. },
  132. ontext: (htmltext) => {
  133. if ( !reference ) {
  134. if ( listlevel > -1 ) {
  135. htmltext = htmltext.replace( /\n/g, '\n' + '\u200b '.repeat(4*listlevel+3) );
  136. }
  137. text += escapeFormatting(htmltext);
  138. }
  139. },
  140. onclosetag: (tagname) => {
  141. if ( tagname === 'sup' ) reference = false;
  142. if ( tagname === 'ul' ) listlevel--;
  143. },
  144. oncomment: (commenttext) => {
  145. if ( /^LINK'" \d+:\d+$/.test(commenttext) ) text += '*UNKNOWN LINK*';
  146. }
  147. } );
  148. parser.write( html );
  149. parser.end();
  150. return text;
  151. };
  152. /**
  153. * Change HTML text to markdown text.
  154. * @param {String} html - The text in HTML.
  155. * @param {Boolean[]} [escapeArgs] - Arguments for the escaping of text formatting.
  156. * @returns {String}
  157. */
  158. function htmlToDiscord(html, ...escapeArgs) {
  159. var text = '';
  160. var parser = new htmlparser.Parser( {
  161. onopentag: (tagname, attribs) => {
  162. switch (tagname) {
  163. case 'b':
  164. text += '**';
  165. break;
  166. case 'i':
  167. text += '*';
  168. break;
  169. case 's':
  170. text += '~~';
  171. break;
  172. case 'u':
  173. text += '__';
  174. break;
  175. }
  176. },
  177. ontext: (htmltext) => {
  178. text += escapeFormatting(htmltext, ...escapeArgs);
  179. },
  180. onclosetag: (tagname) => {
  181. switch (tagname) {
  182. case 'b':
  183. text += '**';
  184. break;
  185. case 'i':
  186. text += '*';
  187. break;
  188. case 's':
  189. text += '~~';
  190. break;
  191. case 'u':
  192. text += '__';
  193. break;
  194. }
  195. }
  196. } );
  197. parser.write( html );
  198. parser.end();
  199. return text;
  200. };
  201. /**
  202. * Escapes formatting.
  203. * @param {String} [text] - The text to modify.
  204. * @param {Boolean} [isMarkdown] - The text contains markdown links.
  205. * @param {Boolean} [keepLinks] - Don't escape non-markdown links.
  206. * @returns {String}
  207. */
  208. function escapeFormatting(text = '', isMarkdown = false, keepLinks = false) {
  209. if ( !isMarkdown ) text = text.replace( /[()\\]/g, '\\$&' );
  210. if ( !keepLinks ) text = text.replace( /\/\//g, '\\$&' );
  211. return text.replace( /[`_*~:<>{}@|]/g, '\\$&' );
  212. };
  213. module.exports = {
  214. got,
  215. parse_infobox,
  216. toFormatting,
  217. toMarkdown,
  218. toPlaintext,
  219. htmlToPlain,
  220. htmlToDiscord,
  221. escapeFormatting
  222. };