functions.js 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. const htmlparser = require('htmlparser2');
  2. const got = require('got').extend( {
  3. throwHttpErrors: false,
  4. timeout: 5000,
  5. headers: {
  6. 'User-Agent': 'Wiki-Bot/' + ( isDebug ? 'testing' : process.env.npm_package_version ) + ' (Discord; ' + process.env.npm_package_name + ')'
  7. },
  8. responseType: 'json'
  9. } );
  10. /**
  11. * Parse infobox content
  12. * @param {Object} infobox - The content of the infobox.
  13. * @param {import('discord.js').MessageEmbed} embed - The message embed.
  14. * @param {String} [thumbnail] - The default thumbnail for the wiki.
  15. * @returns {import('discord.js').MessageEmbed?}
  16. */
  17. function parse_infobox(infobox, embed, thumbnail) {
  18. if ( !infobox || embed.fields.length >= 25 || embed.length > 5500 ) return;
  19. if ( infobox.parser_tag_version === 2 ) {
  20. infobox.data.forEach( group => {
  21. parse_infobox(group, embed, thumbnail);
  22. } );
  23. embed.fields = embed.fields.filter( (field, i, fields) => {
  24. if ( field.name !== '\u200b' ) return true;
  25. return ( fields[i + 1]?.name && fields[i + 1].name !== '\u200b' );
  26. } );
  27. return embed;
  28. }
  29. switch ( infobox.type ) {
  30. case 'data':
  31. var {label = '', value = '', source = ''} = infobox.data;
  32. label = htmlToPlain(label).trim();
  33. value = htmlToPlain(value).trim();
  34. if ( label.includes( '*UNKNOWN LINK*' ) ) {
  35. label = '`' + source + '`';
  36. embed.brokenInfobox = true;
  37. }
  38. if ( value.includes( '*UNKNOWN LINK*' ) ) {
  39. value = '`' + source + '`';
  40. embed.brokenInfobox = true;
  41. }
  42. if ( label.length > 50 ) label = label.substring(0, 50) + '\u2026';
  43. if ( value.length > 250 ) value = value.substring(0, 250) + '\u2026';
  44. if ( label && value ) embed.addField( label, value, true );
  45. break;
  46. case 'group':
  47. infobox.data.value.forEach( group => {
  48. parse_infobox(group, embed, thumbnail);
  49. } );
  50. break;
  51. case 'header':
  52. var {value = ''} = infobox.data;
  53. value = htmlToPlain(value).trim();
  54. if ( value.length > 100 ) value = value.substring(0, 100) + '\u2026';
  55. if ( value ) embed.addField( '\u200b', '__**' + value + '**__', false );
  56. break;
  57. case 'image':
  58. if ( embed.thumbnail?.url !== thumbnail ) return;
  59. var image = infobox.data.find( img => {
  60. return ( /^(?:https?:)?\/\//.test(img.url) && /\.(?:png|jpg|jpeg|gif)$/.test(img.name) );
  61. } );
  62. if ( image ) embed.setThumbnail( image.url.replace( /^(?:https?:)?\/\//, 'https://' ) );
  63. break;
  64. }
  65. }
  66. /**
  67. * Make wikitext formatting usage.
  68. * @param {String} [text] - The text to modify.
  69. * @param {Boolean} [showEmbed] - If the text is used in an embed.
  70. * @param {import('./wiki.js')} [wiki] - The wiki.
  71. * @param {String} [title] - The page title.
  72. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  73. * @returns {String}
  74. */
  75. function toFormatting(text = '', showEmbed = false, wiki, title = '', fullWikitext = false) {
  76. if ( showEmbed ) return toMarkdown(text, wiki, title, fullWikitext);
  77. else return toPlaintext(text, fullWikitext);
  78. };
  79. /**
  80. * Turns wikitext formatting into markdown.
  81. * @param {String} [text] - The text to modify.
  82. * @param {import('./wiki.js')} wiki - The wiki.
  83. * @param {String} [title] - The page title.
  84. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  85. * @returns {String}
  86. */
  87. function toMarkdown(text = '', wiki, title = '', fullWikitext = false) {
  88. text = text.replace( /[()\\]/g, '\\$&' );
  89. var link = null;
  90. var regex = /\[\[(?:([^\|\]]+)\|)?([^\]]+)\]\]([a-z]*)/g;
  91. while ( ( link = regex.exec(text) ) !== null ) {
  92. var pagetitle = ( link[1] || link[2] );
  93. var page = wiki.toLink(( /^[#\/]/.test(pagetitle) ? title + ( pagetitle.startsWith( '/' ) ? pagetitle : '' ) : pagetitle ), '', ( pagetitle.startsWith( '#' ) ? pagetitle.substring(1) : '' ), true);
  94. text = text.replaceSave( link[0], '[' + link[2] + link[3] + '](' + page + ')' );
  95. }
  96. if ( title !== '' ) {
  97. regex = /\/\*\s*([^\*]+?)\s*\*\/\s*(.)?/g;
  98. while ( ( link = regex.exec(text) ) !== null ) {
  99. text = text.replaceSave( link[0], '[→' + link[1] + '](' + wiki.toLink(title, '', link[1], true) + ')' + ( link[2] ? ': ' + link[2] : '' ) );
  100. }
  101. }
  102. if ( fullWikitext ) {
  103. regex = /\[(?:https?:)?\/\/([^ ]+) ([^\]]+)\]/g;
  104. while ( ( link = regex.exec(text) ) !== null ) {
  105. text = text.replaceSave( link[0], '[' + link[2] + '](https://' + link[1] + ')' );
  106. }
  107. return htmlToDiscord( text, true, true ).replaceSave( /'''/g, '**' ).replaceSave( /''/g, '*' );
  108. }
  109. return escapeFormatting(text, true);
  110. };
  111. /**
  112. * Removes wikitext formatting.
  113. * @param {String} [text] - The text to modify.
  114. * @param {Boolean} [fullWikitext] - If the text can contain full wikitext.
  115. * @returns {String}
  116. */
  117. function toPlaintext(text = '', fullWikitext = false) {
  118. text = text.replace( /\[\[(?:[^\|\]]+\|)?([^\]]+)\]\]/g, '$1' ).replace( /\/\*\s*([^\*]+?)\s*\*\//g, '→$1:' );
  119. if ( fullWikitext ) {
  120. return htmlToPlain( text.replace( /\[(?:https?:)?\/\/(?:[^ ]+) ([^\]]+)\]/g, '$1' ) );
  121. }
  122. else return escapeFormatting(text);
  123. };
  124. /**
  125. * Change HTML text to plain text.
  126. * @param {String} html - The text in HTML.
  127. * @returns {String}
  128. */
  129. function htmlToPlain(html) {
  130. var text = '';
  131. var reference = false;
  132. var listlevel = -1;
  133. var parser = new htmlparser.Parser( {
  134. onopentag: (tagname, attribs) => {
  135. if ( tagname === 'sup' && attribs.class === 'reference' ) reference = true;
  136. if ( tagname === 'br' ) {
  137. text += '\n';
  138. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel + 3);
  139. }
  140. if ( tagname === 'hr' ) {
  141. if ( !text.endsWith( '\n' ) ) text += '\n';
  142. text += '─'.repeat(10) + '\n';
  143. }
  144. if ( tagname === 'p' && !text.endsWith( '\n' ) ) text += '\n';
  145. if ( tagname === 'ul' ) listlevel++;
  146. if ( tagname === 'li' ) {
  147. if ( !text.endsWith( '\n' ) ) text += '\n';
  148. if ( listlevel > -1 ) text += '\u200b '.repeat(4 * listlevel);
  149. text += '• ';
  150. }
  151. },
  152. ontext: (htmltext) => {
  153. if ( !reference ) {
  154. if ( listlevel > -1 ) {
  155. htmltext = htmltext.replace( /\n/g, '\n' + '\u200b '.repeat(4 * listlevel + 3) );
  156. }
  157. text += escapeFormatting(htmltext);
  158. }
  159. },
  160. onclosetag: (tagname) => {
  161. if ( tagname === 'sup' ) reference = false;
  162. if ( tagname === 'ul' ) listlevel--;
  163. },
  164. oncomment: (commenttext) => {
  165. if ( /^LINK'" \d+:\d+$/.test(commenttext) ) text += '*UNKNOWN LINK*';
  166. }
  167. } );
  168. parser.write( html );
  169. parser.end();
  170. return text;
  171. };
  172. /**
  173. * Change HTML text to markdown text.
  174. * @param {String} html - The text in HTML.
  175. * @param {Boolean[]} [escapeArgs] - Arguments for the escaping of text formatting.
  176. * @returns {String}
  177. */
  178. function htmlToDiscord(html, ...escapeArgs) {
  179. var text = '';
  180. var parser = new htmlparser.Parser( {
  181. onopentag: (tagname, attribs) => {
  182. switch (tagname) {
  183. case 'b':
  184. text += '**';
  185. break;
  186. case 'i':
  187. text += '*';
  188. break;
  189. case 's':
  190. text += '~~';
  191. break;
  192. case 'u':
  193. text += '__';
  194. break;
  195. }
  196. },
  197. ontext: (htmltext) => {
  198. text += escapeFormatting(htmltext, ...escapeArgs);
  199. },
  200. onclosetag: (tagname) => {
  201. switch (tagname) {
  202. case 'b':
  203. text += '**';
  204. break;
  205. case 'i':
  206. text += '*';
  207. break;
  208. case 's':
  209. text += '~~';
  210. break;
  211. case 'u':
  212. text += '__';
  213. break;
  214. }
  215. }
  216. } );
  217. parser.write( html );
  218. parser.end();
  219. return text;
  220. };
  221. /**
  222. * Escapes formatting.
  223. * @param {String} [text] - The text to modify.
  224. * @param {Boolean} [isMarkdown] - The text contains markdown links.
  225. * @param {Boolean} [keepLinks] - Don't escape non-markdown links.
  226. * @returns {String}
  227. */
  228. function escapeFormatting(text = '', isMarkdown = false, keepLinks = false) {
  229. if ( !isMarkdown ) text = text.replace( /[()\\]/g, '\\$&' );
  230. if ( !keepLinks ) text = text.replace( /\/\//g, '\\$&' );
  231. return text.replace( /[`_*~:<>{}@|]/g, '\\$&' );
  232. };
  233. module.exports = {
  234. got,
  235. parse_infobox,
  236. toFormatting,
  237. toMarkdown,
  238. toPlaintext,
  239. htmlToPlain,
  240. htmlToDiscord,
  241. escapeFormatting
  242. };