|
| 1 | +// Taken from: https://www.npmjs.com/package/highlight-words-core |
| 2 | +// If the PR for handling HTML element is merged, we may use the lib directly |
| 3 | + |
| 4 | +export type Chunk = { |
| 5 | + highlight: boolean |
| 6 | + start: number |
| 7 | + end: number |
| 8 | +} |
| 9 | + |
| 10 | +type HTMLTagLocation = { |
| 11 | + start: number |
| 12 | + end: number |
| 13 | +} |
| 14 | + |
| 15 | +/** |
| 16 | + * Creates an array of chunk objects representing both higlightable and non highlightable pieces of text that match each search word. |
| 17 | + * @return Array of "chunks" (where a Chunk is { start:number, end:number, highlight:boolean }) |
| 18 | + */ |
| 19 | +export const findAll = ({ |
| 20 | + autoEscape, |
| 21 | + caseSensitive = false, |
| 22 | + findChunks = defaultFindChunks, |
| 23 | + sanitize, |
| 24 | + searchWords, |
| 25 | + textToHighlight, |
| 26 | + htmlText, |
| 27 | +}: { |
| 28 | + autoEscape?: boolean |
| 29 | + caseSensitive?: boolean |
| 30 | + findChunks?: typeof defaultFindChunks |
| 31 | + sanitize?: typeof defaultSanitize |
| 32 | + searchWords: Array<string> |
| 33 | + textToHighlight: string |
| 34 | + htmlText?: boolean |
| 35 | +}): Array<Chunk> => |
| 36 | + fillInChunks({ |
| 37 | + chunksToHighlight: combineChunks({ |
| 38 | + chunks: findChunks({ |
| 39 | + autoEscape, |
| 40 | + caseSensitive, |
| 41 | + sanitize, |
| 42 | + searchWords, |
| 43 | + textToHighlight, |
| 44 | + htmlText, |
| 45 | + }), |
| 46 | + }), |
| 47 | + totalLength: textToHighlight ? textToHighlight.length : 0, |
| 48 | + }) |
| 49 | + |
| 50 | +/** |
| 51 | + * Takes an array of {start:number, end:number} objects and combines chunks that overlap into single chunks. |
| 52 | + * @return {start:number, end:number}[] |
| 53 | + */ |
| 54 | +export const combineChunks = ({ |
| 55 | + chunks, |
| 56 | +}: { |
| 57 | + chunks: Array<Chunk> |
| 58 | +}): Array<Chunk> => { |
| 59 | + chunks = chunks |
| 60 | + .sort((first, second) => first.start - second.start) |
| 61 | + .reduce((processedChunks, nextChunk) => { |
| 62 | + // First chunk just goes straight in the array... |
| 63 | + if (processedChunks.length === 0) { |
| 64 | + return [nextChunk] |
| 65 | + } else { |
| 66 | + // ... subsequent chunks get checked to see if they overlap... |
| 67 | + const prevChunk = processedChunks.pop() |
| 68 | + if (nextChunk.start <= prevChunk.end) { |
| 69 | + // It may be the case that prevChunk completely surrounds nextChunk, so take the |
| 70 | + // largest of the end indeces. |
| 71 | + const endIndex = Math.max(prevChunk.end, nextChunk.end) |
| 72 | + processedChunks.push({ |
| 73 | + highlight: false, |
| 74 | + start: prevChunk.start, |
| 75 | + end: endIndex, |
| 76 | + }) |
| 77 | + } else { |
| 78 | + processedChunks.push(prevChunk, nextChunk) |
| 79 | + } |
| 80 | + return processedChunks |
| 81 | + } |
| 82 | + }, []) |
| 83 | + |
| 84 | + return chunks |
| 85 | +} |
| 86 | + |
| 87 | +/** |
| 88 | + * Examine text for any matches. |
| 89 | + * If we find matches, add them to the returned array as a "chunk" object ({start:number, end:number}). |
| 90 | + * @return {start:number, end:number}[] |
| 91 | + */ |
| 92 | +const defaultFindChunks = ({ |
| 93 | + autoEscape, |
| 94 | + caseSensitive, |
| 95 | + sanitize = defaultSanitize, |
| 96 | + searchWords, |
| 97 | + textToHighlight, |
| 98 | + htmlText, |
| 99 | +}: { |
| 100 | + autoEscape?: boolean |
| 101 | + caseSensitive?: boolean |
| 102 | + sanitize?: typeof defaultSanitize |
| 103 | + searchWords: Array<string> |
| 104 | + textToHighlight: string |
| 105 | + htmlText?: boolean |
| 106 | +}): Array<Chunk> => { |
| 107 | + textToHighlight = sanitize(textToHighlight) |
| 108 | + const htmlTagLocation = htmlText ? findHtmlTagLocations(textToHighlight) : [] |
| 109 | + |
| 110 | + return searchWords |
| 111 | + .filter((searchWord) => searchWord) // Remove empty words |
| 112 | + .reduce((chunks, searchWord) => { |
| 113 | + searchWord = sanitize(searchWord) |
| 114 | + |
| 115 | + if (autoEscape) { |
| 116 | + searchWord = escapeRegExpFn(searchWord) |
| 117 | + } |
| 118 | + |
| 119 | + const regex = new RegExp(searchWord, caseSensitive ? 'g' : 'gi') |
| 120 | + |
| 121 | + let match |
| 122 | + while ((match = regex.exec(textToHighlight))) { |
| 123 | + const start = match.index |
| 124 | + const end = regex.lastIndex |
| 125 | + // We do not return zero-length matches |
| 126 | + if (end > start) { |
| 127 | + if (htmlText) { |
| 128 | + if (!isInTag(start, end, htmlTagLocation)) { |
| 129 | + chunks.push({ highlight: false, start, end }) |
| 130 | + } |
| 131 | + } else { |
| 132 | + chunks.push({ highlight: false, start, end }) |
| 133 | + } |
| 134 | + } |
| 135 | + |
| 136 | + // Prevent browsers like Firefox from getting stuck in an infinite loop |
| 137 | + // See http://www.regexguru.com/2008/04/watch-out-for-zero-length-matches/ |
| 138 | + if (match.index === regex.lastIndex) { |
| 139 | + regex.lastIndex++ |
| 140 | + } |
| 141 | + } |
| 142 | + |
| 143 | + return chunks |
| 144 | + }, []) |
| 145 | +} |
| 146 | +// Allow the findChunks to be overridden in findAll, |
| 147 | +// but for backwards compatibility we export as the old name |
| 148 | +export { defaultFindChunks as findChunks } |
| 149 | + |
| 150 | +/** |
| 151 | + * Given a set of chunks to highlight, create an additional set of chunks |
| 152 | + * to represent the bits of text between the highlighted text. |
| 153 | + * @param chunksToHighlight {start:number, end:number}[] |
| 154 | + * @param totalLength number |
| 155 | + * @return {start:number, end:number, highlight:boolean}[] |
| 156 | + */ |
| 157 | +export const fillInChunks = ({ |
| 158 | + chunksToHighlight, |
| 159 | + totalLength, |
| 160 | +}: { |
| 161 | + chunksToHighlight: Array<Chunk> |
| 162 | + totalLength: number |
| 163 | +}): Array<Chunk> => { |
| 164 | + const allChunks: Chunk[] = [] |
| 165 | + const append = (start: number, end: number, highlight: boolean) => { |
| 166 | + if (end - start > 0) { |
| 167 | + allChunks.push({ |
| 168 | + start, |
| 169 | + end, |
| 170 | + highlight, |
| 171 | + }) |
| 172 | + } |
| 173 | + } |
| 174 | + |
| 175 | + if (chunksToHighlight.length === 0) { |
| 176 | + append(0, totalLength, false) |
| 177 | + } else { |
| 178 | + let lastIndex = 0 |
| 179 | + chunksToHighlight.forEach((chunk) => { |
| 180 | + append(lastIndex, chunk.start, false) |
| 181 | + append(chunk.start, chunk.end, true) |
| 182 | + lastIndex = chunk.end |
| 183 | + }) |
| 184 | + append(lastIndex, totalLength, false) |
| 185 | + } |
| 186 | + return allChunks |
| 187 | +} |
| 188 | + |
| 189 | +function defaultSanitize(string: string): string { |
| 190 | + return string |
| 191 | +} |
| 192 | + |
| 193 | +function escapeRegExpFn(string: string): string { |
| 194 | + return string.replace(/[\-\[\]/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&') // eslint-disable-line |
| 195 | +} |
| 196 | + |
| 197 | +function findHtmlTagLocations(text: string): HTMLTagLocation[] { |
| 198 | + // Stolen from here: https://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/ |
| 199 | + const tagExp = new RegExp( |
| 200 | + /<\/?\w+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[\^'">\s]+))?)+\s*|\s*)\/?>/, |
| 201 | + 'g' |
| 202 | + ) |
| 203 | + const locations: HTMLTagLocation[] = [] |
| 204 | + |
| 205 | + let match |
| 206 | + while ((match = tagExp.exec(text))) { |
| 207 | + locations.push({ start: match.index, end: tagExp.lastIndex }) |
| 208 | + |
| 209 | + // Prevent browsers like Firefox from getting stuck in an infinite loop |
| 210 | + // See http://www.regexguru.com/2008/04/watch-out-for-zero-length-matches/ |
| 211 | + if (match.index === tagExp.lastIndex) { |
| 212 | + tagExp.lastIndex++ |
| 213 | + } |
| 214 | + } |
| 215 | + |
| 216 | + return locations |
| 217 | +} |
| 218 | + |
| 219 | +function isInTag( |
| 220 | + start: number, |
| 221 | + end: number, |
| 222 | + htmlTagLocation: HTMLTagLocation[] |
| 223 | +): boolean { |
| 224 | + for (const location of htmlTagLocation) { |
| 225 | + if (start > location.start && end < location.end) { |
| 226 | + return true |
| 227 | + } |
| 228 | + } |
| 229 | + return false |
| 230 | +} |
0 commit comments