2
0
Эх сурвалжийг харах

fix: restore unicode chars for search content

Nick 5 жил өмнө
parent
commit
744e6e3248
3 өөрчлөгдсөн 7 нэмэгдсэн , 4 устгасан
  1. 1 0
      package.json
  2. 5 3
      server/models/pages.js
  3. 1 1
      yarn.lock

+ 1 - 0
package.json

@@ -76,6 +76,7 @@
     "graphql-rate-limit-directive": "1.1.0",
     "graphql-subscriptions": "1.1.0",
     "graphql-tools": "4.0.5",
+    "he": "1.2.0",
     "highlight.js": "9.15.10",
     "i18next": "17.0.15",
     "i18next-express-middleware": "1.8.2",

+ 5 - 3
server/models/pages.js

@@ -7,6 +7,7 @@ const fs = require('fs-extra')
 const yaml = require('js-yaml')
 const striptags = require('striptags')
 const emojiRegex = require('emoji-regex')
+const he = require('he')
 
 /* global WIKI */
 
@@ -17,7 +18,7 @@ const frontmatterRegex = {
 }
 
 const punctuationRegex = /[!,:;/\\_+\-=()&#@<>$~%^*[\]{}"'|]+|(\.\s)|(\s\.)/ig
-const htmlEntitiesRegex = /(&#[0-9]{3};)|(&#x[a-zA-Z0-9]{2};)/ig
+// const htmlEntitiesRegex = /(&#[0-9]{3};)|(&#x[a-zA-Z0-9]{2};)/ig
 
 /**
  * Pages model
@@ -663,9 +664,10 @@ module.exports = class Page extends Model {
    * @returns {string} Cleaned Content Text
    */
   static cleanHTML(rawHTML = '') {
-    return striptags(rawHTML || '')
+    let data = striptags(rawHTML || '')
       .replace(emojiRegex(), '')
-      .replace(htmlEntitiesRegex, '')
+      // .replace(htmlEntitiesRegex, '')
+    return he.decode(data)
       .replace(punctuationRegex, ' ')
       .replace(/(\r\n|\n|\r)/gm, ' ')
       .replace(/\s\s+/g, ' ')

+ 1 - 1
yarn.lock

@@ -6215,7 +6215,7 @@ hash.js@^1.0.0, hash.js@^1.0.3, hash.js@^1.1.3:
     inherits "^2.0.3"
     minimalistic-assert "^1.0.1"
 
-he@^1.1.0, he@^1.2.0:
+he@1.2.0, he@^1.1.0, he@^1.2.0:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f"
   integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==