Directly bring code from highlight-words-core until [PR#21] is merged

vincent-psarga · vincent-psarga · commit fadeb63a4db7 · 2020-05-27T10:29:09.000+02:00
Also enable highlighting dangerouslySetHtml in order to highlight in Markdown components. By default, it will not accept HTML input (or at least will escape it) [PR#21](bvaughn/highlight-words-core#21)
diff --git a/package.json b/package.json
@@ -28,12 +28,10 @@
     "@fortawesome/free-solid-svg-icons": "^5.13.0",
     "@fortawesome/react-fontawesome": "^0.1.9",
     "@types/elasticlunr": "^0.9.0",
-    "@types/react-highlight-words": "^0.16.1",
     "color": "^3.1.2",
     "elasticlunr": "^0.9.5",
     "marked": "^0.8.2",
     "react-accessible-accordion": "^3.0.1",
-    "react-highlight-words": "^0.16.0",
     "sanitize-html": "^1.23.0"
   },
   "peerDependencies": {
diff --git a/src/components/app/HighLight.tsx b/src/components/app/HighLight.tsx
@@ -1,39 +1,71 @@
 import React from 'react'
-import Highlighter from 'react-highlight-words'
+import { findAll, Chunk } from '../highlight-words'
 import SearchQueryContext from '../../SearchQueryContext'
 import elasticlunr from 'elasticlunr'
 
 interface IProps {
   text: string
+  htmlText?: boolean
 }
 
-const HighLight: React.FunctionComponent<IProps> = ({ text }) => {
-  const searchQueryContext = React.useContext(SearchQueryContext)
-  const queryWords = searchQueryContext.query
-    ? searchQueryContext.query.split(' ')
-    : []
-  const searchWords: string[] = []
-
-  for (const word of queryWords) {
+const allQueryWords = (queryWords: string[]): string[] => {
+  return queryWords.reduce((allWords, word) => {
     const stem = elasticlunr.stemmer(word)
-    searchWords.push(word)
+    allWords.push(word)
 
     if (stem !== word) {
-      searchWords.push(stem)
+      allWords.push(stem)
     }
-  }
+    return allWords
+  }, [])
+}
 
-  elasticlunr.stemmer
+const highlightText = (text: string, chunks: Chunk[]): string => {
+  return chunks.reduce((highlighted, chunk) => {
+    const chunkText = text.slice(chunk.start, chunk.end)
+    if (chunk.highlight) {
+      return `${highlighted}<mark>${chunkText}</mark>`
+    }
+    return `${highlighted}${chunkText}`
+  }, '')
+}
 
-  return (
-    <Highlighter
-      className="highlight"
-      highlightClassName="YourHighlightClass"
-      searchWords={searchWords}
-      autoEscape={true}
-      textToHighlight={text}
-    />
+const highlightElements = (text: string, chunks: Chunk[]): JSX.Element[] => {
+  return chunks.reduce((elements: JSX.Element[], chunk) => {
+    const chunkText = text.slice(chunk.start, chunk.end)
+    if (chunk.highlight) {
+      elements.push(<mark>{chunkText}</mark>)
+    } else {
+      elements.push(<span>{chunkText}</span>)
+    }
+    return elements
+  }, [])
+}
+
+const HighLight: React.FunctionComponent<IProps> = ({
+  text,
+  htmlText = false,
+}) => {
+  const searchQueryContext = React.useContext(SearchQueryContext)
+  const queryWords = allQueryWords(
+    searchQueryContext.query ? searchQueryContext.query.split(' ') : []
   )
+  const chunks = findAll({
+    searchWords: queryWords,
+    textToHighlight: text,
+    htmlText,
+  })
+
+  if (htmlText) {
+    return (
+      <span
+        className="highlight"
+        dangerouslySetInnerHTML={{ __html: highlightText(text, chunks) }}
+      />
+    )
+  }
+
+  return <span className="highlight">{highlightElements(text, chunks)}</span>
 }
 
 export default HighLight
diff --git a/src/components/gherkin/Description.tsx b/src/components/gherkin/Description.tsx
@@ -1,6 +1,7 @@
 import React from 'react'
 import marked from 'marked'
 import sanitizeHtml from 'sanitize-html'
+import HighLight from '../app/HighLight'
 
 interface IProps {
   description: string
@@ -9,7 +10,8 @@ interface IProps {
 const Description: React.FunctionComponent<IProps> = ({ description }) => {
   const html = marked(description)
   const sanitizedHtml = sanitizeHtml(html)
-  return <div dangerouslySetInnerHTML={{ __html: sanitizedHtml }} />
+
+  return <HighLight text={sanitizedHtml} htmlText={true} />
 }
 
 export default Description
diff --git a/src/components/highlight-words.ts b/src/components/highlight-words.ts
@@ -0,0 +1,230 @@
+// Taken from: https://www.npmjs.com/package/highlight-words-core
+// If the PR for handling HTML element is merged, we may use the lib directly
+
+export type Chunk = {
+  highlight: boolean
+  start: number
+  end: number
+}
+
+type HTMLTagLocation = {
+  start: number
+  end: number
+}
+
+/**
+ * Creates an array of chunk objects representing both higlightable and non highlightable pieces of text that match each search word.
+ * @return Array of "chunks" (where a Chunk is { start:number, end:number, highlight:boolean })
+ */
+export const findAll = ({
+  autoEscape,
+  caseSensitive = false,
+  findChunks = defaultFindChunks,
+  sanitize,
+  searchWords,
+  textToHighlight,
+  htmlText,
+}: {
+  autoEscape?: boolean
+  caseSensitive?: boolean
+  findChunks?: typeof defaultFindChunks
+  sanitize?: typeof defaultSanitize
+  searchWords: Array<string>
+  textToHighlight: string
+  htmlText?: boolean
+}): Array<Chunk> =>
+  fillInChunks({
+    chunksToHighlight: combineChunks({
+      chunks: findChunks({
+        autoEscape,
+        caseSensitive,
+        sanitize,
+        searchWords,
+        textToHighlight,
+        htmlText,
+      }),
+    }),
+    totalLength: textToHighlight ? textToHighlight.length : 0,
+  })
+
+/**
+ * Takes an array of {start:number, end:number} objects and combines chunks that overlap into single chunks.
+ * @return {start:number, end:number}[]
+ */
+export const combineChunks = ({
+  chunks,
+}: {
+  chunks: Array<Chunk>
+}): Array<Chunk> => {
+  chunks = chunks
+    .sort((first, second) => first.start - second.start)
+    .reduce((processedChunks, nextChunk) => {
+      // First chunk just goes straight in the array...
+      if (processedChunks.length === 0) {
+        return [nextChunk]
+      } else {
+        // ... subsequent chunks get checked to see if they overlap...
+        const prevChunk = processedChunks.pop()
+        if (nextChunk.start <= prevChunk.end) {
+          // It may be the case that prevChunk completely surrounds nextChunk, so take the
+          // largest of the end indeces.
+          const endIndex = Math.max(prevChunk.end, nextChunk.end)
+          processedChunks.push({
+            highlight: false,
+            start: prevChunk.start,
+            end: endIndex,
+          })
+        } else {
+          processedChunks.push(prevChunk, nextChunk)
+        }
+        return processedChunks
+      }
+    }, [])
+
+  return chunks
+}
+
+/**
+ * Examine text for any matches.
+ * If we find matches, add them to the returned array as a "chunk" object ({start:number, end:number}).
+ * @return {start:number, end:number}[]
+ */
+const defaultFindChunks = ({
+  autoEscape,
+  caseSensitive,
+  sanitize = defaultSanitize,
+  searchWords,
+  textToHighlight,
+  htmlText,
+}: {
+  autoEscape?: boolean
+  caseSensitive?: boolean
+  sanitize?: typeof defaultSanitize
+  searchWords: Array<string>
+  textToHighlight: string
+  htmlText?: boolean
+}): Array<Chunk> => {
+  textToHighlight = sanitize(textToHighlight)
+  const htmlTagLocation = htmlText ? findHtmlTagLocations(textToHighlight) : []
+
+  return searchWords
+    .filter((searchWord) => searchWord) // Remove empty words
+    .reduce((chunks, searchWord) => {
+      searchWord = sanitize(searchWord)
+
+      if (autoEscape) {
+        searchWord = escapeRegExpFn(searchWord)
+      }
+
+      const regex = new RegExp(searchWord, caseSensitive ? 'g' : 'gi')
+
+      let match
+      while ((match = regex.exec(textToHighlight))) {
+        const start = match.index
+        const end = regex.lastIndex
+        // We do not return zero-length matches
+        if (end > start) {
+          if (htmlText) {
+            if (!isInTag(start, end, htmlTagLocation)) {
+              chunks.push({ highlight: false, start, end })
+            }
+          } else {
+            chunks.push({ highlight: false, start, end })
+          }
+        }
+
+        // Prevent browsers like Firefox from getting stuck in an infinite loop
+        // See http://www.regexguru.com/2008/04/watch-out-for-zero-length-matches/
+        if (match.index === regex.lastIndex) {
+          regex.lastIndex++
+        }
+      }
+
+      return chunks
+    }, [])
+}
+// Allow the findChunks to be overridden in findAll,
+// but for backwards compatibility we export as the old name
+export { defaultFindChunks as findChunks }
+
+/**
+ * Given a set of chunks to highlight, create an additional set of chunks
+ * to represent the bits of text between the highlighted text.
+ * @param chunksToHighlight {start:number, end:number}[]
+ * @param totalLength number
+ * @return {start:number, end:number, highlight:boolean}[]
+ */
+export const fillInChunks = ({
+  chunksToHighlight,
+  totalLength,
+}: {
+  chunksToHighlight: Array<Chunk>
+  totalLength: number
+}): Array<Chunk> => {
+  const allChunks: Chunk[] = []
+  const append = (start: number, end: number, highlight: boolean) => {
+    if (end - start > 0) {
+      allChunks.push({
+        start,
+        end,
+        highlight,
+      })
+    }
+  }
+
+  if (chunksToHighlight.length === 0) {
+    append(0, totalLength, false)
+  } else {
+    let lastIndex = 0
+    chunksToHighlight.forEach((chunk) => {
+      append(lastIndex, chunk.start, false)
+      append(chunk.start, chunk.end, true)
+      lastIndex = chunk.end
+    })
+    append(lastIndex, totalLength, false)
+  }
+  return allChunks
+}
+
+function defaultSanitize(string: string): string {
+  return string
+}
+
+function escapeRegExpFn(string: string): string {
+  return string.replace(/[\-\[\]/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&') // eslint-disable-line
+}
+
+function findHtmlTagLocations(text: string): HTMLTagLocation[] {
+  // Stolen from here: https://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/
+  const tagExp = new RegExp(
+    /<\/?\w+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[\^'">\s]+))?)+\s*|\s*)\/?>/,
+    'g'
+  )
+  const locations: HTMLTagLocation[] = []
+
+  let match
+  while ((match = tagExp.exec(text))) {
+    locations.push({ start: match.index, end: tagExp.lastIndex })
+
+    // Prevent browsers like Firefox from getting stuck in an infinite loop
+    // See http://www.regexguru.com/2008/04/watch-out-for-zero-length-matches/
+    if (match.index === tagExp.lastIndex) {
+      tagExp.lastIndex++
+    }
+  }
+
+  return locations
+}
+
+function isInTag(
+  start: number,
+  end: number,
+  htmlTagLocation: HTMLTagLocation[]
+): boolean {
+  for (const location of htmlTagLocation) {
+    if (start > location.start && end < location.end) {
+      return true
+    }
+  }
+  return false
+}
diff --git a/test/HighlightTest.tsx b/test/HighlightTest.tsx
diff --git a/testdata/statuses.feature b/testdata/statuses.feature