engine.js 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. const tsquery = require('pg-tsquery')()
  2. const stream = require('stream')
  3. const Promise = require('bluebird')
  4. const pipeline = Promise.promisify(stream.pipeline)
  5. /* global WIKI */
  6. module.exports = {
  7. async activate() {
  8. if (WIKI.config.db.type !== 'postgres') {
  9. throw new WIKI.Error.SearchActivationFailed('Must use PostgreSQL database to activate this engine!')
  10. }
  11. },
  12. async deactivate() {
  13. WIKI.logger.info(`(SEARCH/POSTGRES) Dropping index tables...`)
  14. await WIKI.models.knex.schema.dropTable('pagesWords')
  15. await WIKI.models.knex.schema.dropTable('pagesVector')
  16. WIKI.logger.info(`(SEARCH/POSTGRES) Index tables have been dropped.`)
  17. },
  18. /**
  19. * INIT
  20. */
  21. async init() {
  22. WIKI.logger.info(`(SEARCH/POSTGRES) Initializing...`)
  23. // -> Ensure pg_trgm extension is available (required for similarity search)
  24. await WIKI.models.knex.raw('CREATE EXTENSION IF NOT EXISTS pg_trgm')
  25. // -> Create Search Index
  26. const indexExists = await WIKI.models.knex.schema.hasTable('pagesVector')
  27. if (!indexExists) {
  28. WIKI.logger.info(`(SEARCH/POSTGRES) Creating Pages Vector table...`)
  29. await WIKI.models.knex.schema.createTable('pagesVector', table => {
  30. table.increments()
  31. table.string('path')
  32. table.string('locale')
  33. table.string('title')
  34. table.string('description')
  35. table.specificType('tokens', 'TSVECTOR')
  36. table.text('content')
  37. })
  38. }
  39. // -> Create Words Index
  40. const wordsExists = await WIKI.models.knex.schema.hasTable('pagesWords')
  41. if (!wordsExists) {
  42. WIKI.logger.info(`(SEARCH/POSTGRES) Creating Words Suggestion Index...`)
  43. await WIKI.models.knex.raw(`
  44. CREATE TABLE "pagesWords" AS SELECT word FROM ts_stat(
  45. 'SELECT to_tsvector(''simple'', "title") || to_tsvector(''simple'', "description") || to_tsvector(''simple'', "content") FROM "pagesVector"'
  46. )`)
  47. await WIKI.models.knex.raw(`CREATE INDEX "pageWords_idx" ON "pagesWords" USING GIN (word gin_trgm_ops)`)
  48. }
  49. WIKI.logger.info(`(SEARCH/POSTGRES) Initialization completed.`)
  50. },
  51. /**
  52. * QUERY
  53. *
  54. * @param {String} q Query
  55. * @param {Object} opts Additional options
  56. */
  57. async query(q, opts) {
  58. try {
  59. let suggestions = []
  60. let qry = `
  61. SELECT id, path, locale, title, description
  62. FROM "pagesVector", to_tsquery(?,?) query
  63. WHERE (query @@ "tokens" OR path ILIKE ?)
  64. `
  65. let qryEnd = `ORDER BY ts_rank(tokens, query) DESC`
  66. let qryParams = [this.config.dictLanguage, tsquery(q), `%${q.toLowerCase()}%`]
  67. if (opts.locale) {
  68. qry = `${qry} AND locale = ?`
  69. qryParams.push(opts.locale)
  70. }
  71. if (opts.path) {
  72. qry = `${qry} AND path ILIKE ?`
  73. qryParams.push(`%${opts.path}`)
  74. }
  75. const results = await WIKI.models.knex.raw(`
  76. ${qry}
  77. ${qryEnd}
  78. `, qryParams)
  79. if (results.rows.length < 5) {
  80. try {
  81. const suggestResults = await WIKI.models.knex.raw(`SELECT word, word <-> ? AS rank FROM "pagesWords" WHERE similarity(word, ?) > 0.2 ORDER BY rank LIMIT 5;`, [q, q])
  82. suggestions = suggestResults.rows.map(r => r.word)
  83. } catch (err) {
  84. WIKI.logger.warn(`Search Engine Suggestion Error (pg_trgm extension may be missing): ${err.message}`)
  85. }
  86. }
  87. return {
  88. results: results.rows,
  89. suggestions,
  90. totalHits: results.rows.length
  91. }
  92. } catch (err) {
  93. WIKI.logger.warn('Search Engine Error:')
  94. WIKI.logger.warn(err)
  95. }
  96. },
  97. /**
  98. * CREATE
  99. *
  100. * @param {Object} page Page to create
  101. */
  102. async created(page) {
  103. await WIKI.models.knex.raw(`
  104. INSERT INTO "pagesVector" (path, locale, title, description, "tokens") VALUES (
  105. ?, ?, ?, ?, (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C'))
  106. )
  107. `, [page.path, page.localeCode, page.title, page.description, page.title, page.description, page.safeContent])
  108. },
  109. /**
  110. * UPDATE
  111. *
  112. * @param {Object} page Page to update
  113. */
  114. async updated(page) {
  115. await WIKI.models.knex.raw(`
  116. UPDATE "pagesVector" SET
  117. title = ?,
  118. description = ?,
  119. tokens = (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') ||
  120. setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') ||
  121. setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C'))
  122. WHERE path = ? AND locale = ?
  123. `, [page.title, page.description, page.title, page.description, page.safeContent, page.path, page.localeCode])
  124. },
  125. /**
  126. * DELETE
  127. *
  128. * @param {Object} page Page to delete
  129. */
  130. async deleted(page) {
  131. await WIKI.models.knex('pagesVector').where({
  132. locale: page.localeCode,
  133. path: page.path
  134. }).del().limit(1)
  135. },
  136. /**
  137. * RENAME
  138. *
  139. * @param {Object} page Page to rename
  140. */
  141. async renamed(page) {
  142. await WIKI.models.knex('pagesVector').where({
  143. locale: page.localeCode,
  144. path: page.path
  145. }).update({
  146. locale: page.destinationLocaleCode,
  147. path: page.destinationPath
  148. })
  149. },
  150. /**
  151. * REBUILD INDEX
  152. */
  153. async rebuild() {
  154. WIKI.logger.info(`(SEARCH/POSTGRES) Rebuilding Index...`)
  155. await WIKI.models.knex('pagesVector').truncate()
  156. await WIKI.models.knex('pagesWords').truncate()
  157. await pipeline(
  158. WIKI.models.knex.column('path', 'localeCode', 'title', 'description', 'render').select().from('pages').where({
  159. isPublished: true,
  160. isPrivate: false
  161. }).stream(),
  162. new stream.Transform({
  163. objectMode: true,
  164. transform: async (page, enc, cb) => {
  165. const content = WIKI.models.pages.cleanHTML(page.render)
  166. await WIKI.models.knex.raw(`
  167. INSERT INTO "pagesVector" (path, locale, title, description, "tokens", content) VALUES (
  168. ?, ?, ?, ?, (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C')), ?
  169. )
  170. `, [page.path, page.localeCode, page.title, page.description, page.title, page.description, content, content])
  171. cb()
  172. }
  173. })
  174. )
  175. await WIKI.models.knex.raw(`
  176. INSERT INTO "pagesWords" (word)
  177. SELECT word FROM ts_stat(
  178. 'SELECT to_tsvector(''simple'', "title") || to_tsvector(''simple'', "description") || to_tsvector(''simple'', "content") FROM "pagesVector"'
  179. )
  180. `)
  181. WIKI.logger.info(`(SEARCH/POSTGRES) Index rebuilt successfully.`)
  182. }
  183. }