extract_desc.js 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. /**
  2. * Get the description for a page.
  3. * @param {String} [text] - The full page extract.
  4. * @param {String} [fragment] - The section title.
  5. * @returns {String[]}
  6. */
  7. function extract_desc(text = '', fragment = '') {
  8. var sectionIndex = text.indexOf('\ufffd\ufffd');
  9. var extract = ( sectionIndex !== -1 ? text.substring(0, sectionIndex) : text ).trim().escapeFormatting();
  10. if ( extract.length > 1000 ) extract = extract.substring(0, 1000) + '\u2026';
  11. var section = null;
  12. var regex = /\ufffd{2}(\d)\ufffd{2}([^\n]+)/g;
  13. var sectionHeader = '';
  14. var sectionText = '';
  15. while ( fragment && ( section = regex.exec(text) ) !== null ) {
  16. if ( section[2].replace( / /g, '_' ) !== fragment.replace( / /g, '_' ) ) continue;
  17. sectionHeader = section[2].escapeFormatting();
  18. if ( sectionHeader.length > 240 ) sectionHeader = sectionHeader.substring(0, 240) + '\u2026';
  19. sectionHeader = section_formatting(sectionHeader, section[1]);
  20. sectionText = text.substring(regex.lastIndex);
  21. switch ( section[1] ) {
  22. case '6':
  23. sectionIndex = sectionText.indexOf('\ufffd\ufffd6\ufffd\ufffd');
  24. if ( sectionIndex !== -1 ) sectionText = sectionText.substring(0, sectionIndex);
  25. case '5':
  26. sectionIndex = sectionText.indexOf('\ufffd\ufffd5\ufffd\ufffd');
  27. if ( sectionIndex !== -1 ) sectionText = sectionText.substring(0, sectionIndex);
  28. case '4':
  29. sectionIndex = sectionText.indexOf('\ufffd\ufffd4\ufffd\ufffd');
  30. if ( sectionIndex !== -1 ) sectionText = sectionText.substring(0, sectionIndex);
  31. case '3':
  32. sectionIndex = sectionText.indexOf('\ufffd\ufffd3\ufffd\ufffd');
  33. if ( sectionIndex !== -1 ) sectionText = sectionText.substring(0, sectionIndex);
  34. case '2':
  35. sectionIndex = sectionText.indexOf('\ufffd\ufffd2\ufffd\ufffd');
  36. if ( sectionIndex !== -1 ) sectionText = sectionText.substring(0, sectionIndex);
  37. case '1':
  38. sectionIndex = sectionText.indexOf('\ufffd\ufffd1\ufffd\ufffd');
  39. if ( sectionIndex !== -1 ) sectionText = sectionText.substring(0, sectionIndex);
  40. }
  41. sectionText = sectionText.trim().escapeFormatting().replace( /\ufffd{2}(\d)\ufffd{2}([^\n]+)/g, (match, n, sectionTitle) => {
  42. return section_formatting(sectionTitle, n);
  43. } );
  44. if ( sectionText.length > 1000 ) sectionText = sectionText.substring(0, 1000) + '\u2026';
  45. break;
  46. }
  47. return [extract, sectionHeader, sectionText];
  48. }
  49. /**
  50. * Format section title.
  51. * @param {String} title - The section title.
  52. * @param {String} n - The header level.
  53. * @returns {String}
  54. */
  55. function section_formatting(title, n) {
  56. switch ( n ) {
  57. case '1':
  58. title = '***__' + title + '__***';
  59. break;
  60. case '2':
  61. title = '**__' + title + '__**';
  62. break;
  63. case '3':
  64. title = '**' + title + '**';
  65. break;
  66. case '4':
  67. title = '__' + title + '__';
  68. break;
  69. case '5':
  70. title = '*' + title + '*';
  71. break;
  72. }
  73. return title;
  74. }
  75. module.exports = extract_desc;