|
@@ -7,6 +7,7 @@ const fs = require('fs-extra')
|
|
const yaml = require('js-yaml')
|
|
const yaml = require('js-yaml')
|
|
const striptags = require('striptags')
|
|
const striptags = require('striptags')
|
|
const emojiRegex = require('emoji-regex')
|
|
const emojiRegex = require('emoji-regex')
|
|
|
|
+const he = require('he')
|
|
|
|
|
|
/* global WIKI */
|
|
/* global WIKI */
|
|
|
|
|
|
@@ -17,7 +18,7 @@ const frontmatterRegex = {
|
|
}
|
|
}
|
|
|
|
|
|
const punctuationRegex = /[!,:;/\\_+\-=()&#@<>$~%^*[\]{}"'|]+|(\.\s)|(\s\.)/ig
|
|
const punctuationRegex = /[!,:;/\\_+\-=()&#@<>$~%^*[\]{}"'|]+|(\.\s)|(\s\.)/ig
|
|
-const htmlEntitiesRegex = /(&#[0-9]{3};)|(&#x[a-zA-Z0-9]{2};)/ig
|
|
|
|
|
|
+// const htmlEntitiesRegex = /(&#[0-9]{3};)|(&#x[a-zA-Z0-9]{2};)/ig
|
|
|
|
|
|
/**
|
|
/**
|
|
* Pages model
|
|
* Pages model
|
|
@@ -663,9 +664,10 @@ module.exports = class Page extends Model {
|
|
* @returns {string} Cleaned Content Text
|
|
* @returns {string} Cleaned Content Text
|
|
*/
|
|
*/
|
|
static cleanHTML(rawHTML = '') {
|
|
static cleanHTML(rawHTML = '') {
|
|
- return striptags(rawHTML || '')
|
|
|
|
|
|
+ let data = striptags(rawHTML || '')
|
|
.replace(emojiRegex(), '')
|
|
.replace(emojiRegex(), '')
|
|
- .replace(htmlEntitiesRegex, '')
|
|
|
|
|
|
+ // .replace(htmlEntitiesRegex, '')
|
|
|
|
+ return he.decode(data)
|
|
.replace(punctuationRegex, ' ')
|
|
.replace(punctuationRegex, ' ')
|
|
.replace(/(\r\n|\n|\r)/gm, ' ')
|
|
.replace(/(\r\n|\n|\r)/gm, ' ')
|
|
.replace(/\s\s+/g, ' ')
|
|
.replace(/\s\s+/g, ' ')
|