markdown.js 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. 'use strict'
  2. const md = require('markdown-it')
  3. const mdEmoji = require('markdown-it-emoji')
  4. const mdTaskLists = require('markdown-it-task-lists')
  5. const mdAbbr = require('markdown-it-abbr')
  6. const mdAnchor = require('markdown-it-anchor')
  7. const mdFootnote = require('markdown-it-footnote')
  8. const mdExternalLinks = require('markdown-it-external-links')
  9. const mdExpandTabs = require('markdown-it-expand-tabs')
  10. const mdAttrs = require('markdown-it-attrs')
  11. const hljs = require('highlight.js')
  12. const cheerio = require('cheerio')
  13. const _ = require('lodash')
  14. const mdRemove = require('remove-markdown')
  15. // Load plugins
  16. var mkdown = md({
  17. html: true,
  18. linkify: true,
  19. typography: true,
  20. highlight (str, lang) {
  21. if (lang && hljs.getLanguage(lang)) {
  22. try {
  23. return '<pre class="hljs"><code>' + hljs.highlight(lang, str, true).value + '</code></pre>'
  24. } catch (err) {
  25. return '<pre><code>' + str + '</code></pre>'
  26. }
  27. }
  28. return '<pre><code>' + str + '</code></pre>'
  29. }
  30. })
  31. .use(mdEmoji)
  32. .use(mdTaskLists)
  33. .use(mdAbbr)
  34. .use(mdAnchor, {
  35. slugify: _.kebabCase,
  36. permalink: true,
  37. permalinkClass: 'toc-anchor icon-anchor',
  38. permalinkSymbol: '',
  39. permalinkBefore: true
  40. })
  41. .use(mdFootnote)
  42. .use(mdExternalLinks, {
  43. externalClassName: 'external-link',
  44. internalClassName: 'internal-link'
  45. })
  46. .use(mdExpandTabs, {
  47. tabWidth: 4
  48. })
  49. .use(mdAttrs)
  50. if (appconfig) {
  51. const mdMathjax = require('markdown-it-mathjax')
  52. mkdown.use(mdMathjax())
  53. }
  54. // Rendering rules
  55. mkdown.renderer.rules.emoji = function (token, idx) {
  56. return '<i class="twa twa-' + _.replace(token[idx].markup, /_/g, '-') + '"></i>'
  57. }
  58. // Video rules
  59. const videoRules = [
  60. {
  61. selector: 'a.youtube',
  62. regexp: new RegExp(/(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/)|(?:(?:watch)?\?v(?:i)?=|&v(?:i)?=))([^#&?]*).*/i),
  63. output: '<iframe width="640" height="360" src="https://www.youtube.com/embed/{0}?rel=0" frameborder="0" allowfullscreen></iframe>'
  64. },
  65. {
  66. selector: 'a.vimeo',
  67. regexp: new RegExp(/vimeo.com\/(?:channels\/(?:\w+\/)?|groups\/(?:[^/]*)\/videos\/|album\/(?:\d+)\/video\/|)(\d+)(?:$|\/|\?)/i),
  68. output: '<iframe src="https://player.vimeo.com/video/{0}" width="640" height="360" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
  69. },
  70. {
  71. selector: 'a.dailymotion',
  72. regexp: new RegExp(/(?:dailymotion\.com(?:\/embed)?(?:\/video|\/hub)|dai\.ly)\/([0-9a-z]+)(?:[-_0-9a-zA-Z]+(?:#video=)?([a-z0-9]+)?)?/i),
  73. output: '<iframe width="640" height="360" src="//www.dailymotion.com/embed/video/{0}?endscreen-enable=false" frameborder="0" allowfullscreen></iframe>'
  74. },
  75. {
  76. selector: 'a.video',
  77. regexp: false,
  78. output: '<video width="640" height="360" controls preload="metadata"><source src="{0}" type="video/mp4"></video>'
  79. }
  80. ]
  81. /**
  82. * Parse markdown content and build TOC tree
  83. *
  84. * @param {(Function|string)} content Markdown content
  85. * @return {Array} TOC tree
  86. */
  87. const parseTree = (content) => {
  88. content = content.replace(/<!--(.|\t|\n|\r)*?-->/g, '')
  89. let tokens = md().parse(content, {})
  90. let tocArray = []
  91. // -> Extract headings and their respective levels
  92. for (let i = 0; i < tokens.length; i++) {
  93. if (tokens[i].type !== 'heading_close') {
  94. continue
  95. }
  96. const heading = tokens[i - 1]
  97. const headingclose = tokens[i]
  98. if (heading.type === 'inline') {
  99. let content = ''
  100. let anchor = ''
  101. if (heading.children && heading.children.length > 0 && heading.children[0].type === 'link_open') {
  102. content = mdRemove(heading.children[1].content)
  103. anchor = _.kebabCase(content)
  104. } else {
  105. content = mdRemove(heading.content)
  106. anchor = _.kebabCase(heading.children.reduce((acc, t) => acc + t.content, ''))
  107. }
  108. tocArray.push({
  109. content,
  110. anchor,
  111. level: +headingclose.tag.substr(1, 1)
  112. })
  113. }
  114. }
  115. // -> Exclude levels deeper than 2
  116. _.remove(tocArray, (n) => { return n.level > 2 })
  117. // -> Build tree from flat array
  118. return _.reduce(tocArray, (tree, v) => {
  119. let treeLength = tree.length - 1
  120. if (v.level < 2) {
  121. tree.push({
  122. content: v.content,
  123. anchor: v.anchor,
  124. nodes: []
  125. })
  126. } else {
  127. let lastNodeLevel = 1
  128. let GetNodePath = (startPos) => {
  129. lastNodeLevel++
  130. if (_.isEmpty(startPos)) {
  131. startPos = 'nodes'
  132. }
  133. if (lastNodeLevel === v.level) {
  134. return startPos
  135. } else {
  136. return GetNodePath(startPos + '[' + (_.at(tree[treeLength], startPos).length - 1) + '].nodes')
  137. }
  138. }
  139. let lastNodePath = GetNodePath()
  140. let lastNode = _.get(tree[treeLength], lastNodePath)
  141. if (lastNode) {
  142. lastNode.push({
  143. content: v.content,
  144. anchor: v.anchor,
  145. nodes: []
  146. })
  147. _.set(tree[treeLength], lastNodePath, lastNode)
  148. }
  149. }
  150. return tree
  151. }, [])
  152. }
  153. /**
  154. * Parse markdown content to HTML
  155. *
  156. * @param {String} content Markdown content
  157. * @return {String} HTML formatted content
  158. */
  159. const parseContent = (content) => {
  160. let output = mkdown.render(content)
  161. let cr = cheerio.load(output)
  162. if (cr.root().children().length < 1) {
  163. return ''
  164. }
  165. // -> Check for empty first element
  166. let firstElm = cr.root().children().first()[0]
  167. if (firstElm.type === 'tag' && firstElm.name === 'p') {
  168. let firstElmChildren = firstElm.children
  169. if (firstElmChildren.length < 1) {
  170. firstElm.remove()
  171. } else if (firstElmChildren.length === 1 && firstElmChildren[0].type === 'tag' && firstElmChildren[0].name === 'img') {
  172. cr(firstElm).addClass('is-gapless')
  173. }
  174. }
  175. // -> Remove links in headers
  176. cr('h1 > a:not(.toc-anchor), h2 > a:not(.toc-anchor), h3 > a:not(.toc-anchor)').each((i, elm) => {
  177. let txtLink = cr(elm).text()
  178. cr(elm).replaceWith(txtLink)
  179. })
  180. // -> Re-attach blockquote styling classes to their parents
  181. cr.root().children('blockquote').each((i, elm) => {
  182. if (cr(elm).children().length > 0) {
  183. let bqLastChild = cr(elm).children().last()[0]
  184. let bqLastChildClasses = cr(bqLastChild).attr('class')
  185. if (bqLastChildClasses && bqLastChildClasses.length > 0) {
  186. cr(bqLastChild).removeAttr('class')
  187. cr(elm).addClass(bqLastChildClasses)
  188. }
  189. }
  190. })
  191. // -> Enclose content below headers
  192. cr('h2').each((i, elm) => {
  193. let subH2Content = cr(elm).nextUntil('h1, h2')
  194. cr(elm).after('<div class="indent-h2"></div>')
  195. let subH2Container = cr(elm).next('.indent-h2')
  196. _.forEach(subH2Content, (ch) => {
  197. cr(subH2Container).append(ch)
  198. })
  199. })
  200. cr('h3').each((i, elm) => {
  201. let subH3Content = cr(elm).nextUntil('h1, h2, h3')
  202. cr(elm).after('<div class="indent-h3"></div>')
  203. let subH3Container = cr(elm).next('.indent-h3')
  204. _.forEach(subH3Content, (ch) => {
  205. cr(subH3Container).append(ch)
  206. })
  207. })
  208. // Replace video links with embeds
  209. _.forEach(videoRules, (vrule) => {
  210. cr(vrule.selector).each((i, elm) => {
  211. let originLink = cr(elm).attr('href')
  212. if (vrule.regexp) {
  213. let vidMatches = originLink.match(vrule.regexp)
  214. if ((vidMatches && _.isArray(vidMatches))) {
  215. vidMatches = _.filter(vidMatches, (f) => {
  216. return f && _.isString(f)
  217. })
  218. originLink = _.last(vidMatches)
  219. }
  220. }
  221. let processedLink = _.replace(vrule.output, '{0}', originLink)
  222. cr(elm).replaceWith(processedLink)
  223. })
  224. })
  225. // Apply align-center to parent
  226. cr('img.align-center').each((i, elm) => {
  227. cr(elm).parent().addClass('align-center')
  228. cr(elm).removeClass('align-center')
  229. })
  230. output = cr.html()
  231. return output
  232. }
  233. /**
  234. * Parse meta-data tags from content
  235. *
  236. * @param {String} content Markdown content
  237. * @return {Object} Properties found in the content and their values
  238. */
  239. const parseMeta = (content) => {
  240. let commentMeta = new RegExp('<!-- ?([a-zA-Z]+):(.*)-->', 'g')
  241. let results = {}
  242. let match
  243. while ((match = commentMeta.exec(content)) !== null) {
  244. results[_.toLower(match[1])] = _.trim(match[2])
  245. }
  246. return results
  247. }
  248. /**
  249. * Strips non-text elements from Markdown content
  250. *
  251. * @param {String} content Markdown-formatted content
  252. * @return {String} Text-only version
  253. */
  254. const removeMarkdown = (content) => {
  255. return mdRemove(_.chain(content)
  256. .replace(/<!-- ?([a-zA-Z]+):(.*)-->/g, '')
  257. .replace(/```[^`]+```/g, '')
  258. .replace(/`[^`]+`/g, '')
  259. .replace(new RegExp('(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?', 'g'), '')
  260. .replace(/\r?\n|\r/g, ' ')
  261. .deburr()
  262. .toLower()
  263. .replace(/(\b([^a-z]+)\b)/g, ' ')
  264. .replace(/[^a-z]+/g, ' ')
  265. .replace(/(\b(\w{1,2})\b(\W|$))/g, '')
  266. .replace(/\s\s+/g, ' ')
  267. .value()
  268. )
  269. }
  270. module.exports = {
  271. /**
  272. * Parse content and return all data
  273. *
  274. * @param {String} content Markdown-formatted content
  275. * @return {Object} Object containing meta, html and tree data
  276. */
  277. parse (content) {
  278. return {
  279. meta: parseMeta(content),
  280. html: parseContent(content),
  281. tree: parseTree(content)
  282. }
  283. },
  284. parseContent,
  285. parseMeta,
  286. parseTree,
  287. removeMarkdown
  288. }