|
1 | | -import { rest, tokenize } from '../../shared/symbols'; |
2 | 1 | import { highlight } from '../highlight'; |
3 | 2 | import { highlightAll } from '../highlight-all'; |
4 | 3 | import { highlightElement } from '../highlight-element'; |
5 | | -import { LinkedList } from '../linked-list'; |
6 | 4 | import { Registry } from '../registry'; |
| 5 | +import { tokenize } from '../tokenize/tokenize'; |
7 | 6 | import { Hooks } from './hooks'; |
8 | | -import { Token } from './token'; |
9 | 7 | import type { KnownPlugins } from '../../known-plugins'; |
10 | | -import type { Grammar, GrammarToken, GrammarTokens, RegExpLike } from '../../types'; |
| 8 | +import type { Grammar } from '../../types'; |
11 | 9 | import type { HighlightOptions } from '../highlight'; |
12 | 10 | import type { HighlightAllOptions } from '../highlight-all'; |
13 | 11 | import type { HighlightElementOptions } from '../highlight-element'; |
14 | | -import type { LinkedListHeadNode, LinkedListMiddleNode, LinkedListTailNode } from '../linked-list'; |
15 | 12 | import type { TokenStream } from './token'; |
16 | 13 |
|
17 | 14 | /** |
@@ -45,253 +42,11 @@ export default class Prism { |
45 | 42 | } |
46 | 43 |
|
47 | 44 | /** |
48 | | - * This is the heart of Prism, and the most low-level function you can use. It accepts a string of text as input |
49 | | - * and the language definitions to use, and returns an array with the tokenized code. |
50 | | - * |
51 | | - * When the language definition includes nested tokens, the function is called recursively on each of these tokens. |
52 | | - * |
53 | | - * This method could be useful in other contexts as well, as a very crude parser. |
54 | | - * |
55 | | - * @param text A string with the code to be highlighted. |
56 | | - * @param grammar An object containing the tokens to use. |
57 | | - * |
58 | | - * Usually a language definition like `Prism.languages.markup`. |
59 | | - * @returns An array of strings and tokens, a token stream. |
60 | | - * @example |
61 | | - * let code = `var foo = 0;`; |
62 | | - * let tokens = Prism.tokenize(code, Prism.getLanguage('javascript')); |
63 | | - * tokens.forEach(token => { |
64 | | - * if (token instanceof Token && token.type === 'number') { |
65 | | - * console.log(`Found numeric literal: ${token.content}`); |
66 | | - * } |
67 | | - * }); |
| 45 | + * See {@link tokenize} |
68 | 46 | */ |
69 | 47 | tokenize (text: string, grammar: Grammar): TokenStream { |
70 | | - const customTokenize = grammar[tokenize]; |
71 | | - if (customTokenize) { |
72 | | - return customTokenize(text, grammar, this); |
73 | | - } |
74 | | - |
75 | | - let restGrammar = resolve(this.components, grammar[rest]); |
76 | | - while (restGrammar) { |
77 | | - grammar = { ...grammar, ...restGrammar }; |
78 | | - restGrammar = resolve(this.components, restGrammar[rest]); |
79 | | - } |
80 | | - |
81 | | - const tokenList = new LinkedList<string | Token>(); |
82 | | - tokenList.addAfter(tokenList.head, text); |
83 | | - |
84 | | - this._matchGrammar(text, tokenList, grammar, tokenList.head, 0); |
85 | | - |
86 | | - return tokenList.toArray(); |
87 | | - } |
88 | | - |
89 | | - private _matchGrammar ( |
90 | | - text: string, |
91 | | - tokenList: LinkedList<string | Token>, |
92 | | - grammar: GrammarTokens, |
93 | | - startNode: LinkedListHeadNode<string | Token> | LinkedListMiddleNode<string | Token>, |
94 | | - startPos: number, |
95 | | - rematch?: RematchOptions |
96 | | - ): void { |
97 | | - for (const token in grammar) { |
98 | | - const tokenValue = grammar[token]; |
99 | | - if (!grammar.hasOwnProperty(token) || !tokenValue) { |
100 | | - continue; |
101 | | - } |
102 | | - |
103 | | - const patterns = Array.isArray(tokenValue) ? tokenValue : [tokenValue]; |
104 | | - |
105 | | - for (let j = 0; j < patterns.length; ++j) { |
106 | | - if (rematch && rematch.cause === `${token},${j}`) { |
107 | | - return; |
108 | | - } |
109 | | - |
110 | | - const patternObj = toGrammarToken(patterns[j]); |
111 | | - let { pattern, lookbehind = false, greedy = false, alias, inside } = patternObj; |
112 | | - const insideGrammar = resolve(this.components, inside); |
113 | | - |
114 | | - if (greedy && !pattern.global) { |
115 | | - // Without the global flag, lastIndex won't work |
116 | | - patternObj.pattern = pattern = RegExp(pattern.source, pattern.flags + 'g'); |
117 | | - } |
118 | | - |
119 | | - for ( |
120 | | - // iterate the token list and keep track of the current token/string position |
121 | | - let currentNode = startNode.next, pos = startPos; |
122 | | - currentNode.next !== null; |
123 | | - pos += currentNode.value.length, currentNode = currentNode.next |
124 | | - ) { |
125 | | - if (rematch && pos >= rematch.reach) { |
126 | | - break; |
127 | | - } |
128 | | - |
129 | | - let str = currentNode.value; |
130 | | - |
131 | | - if (tokenList.length > text.length) { |
132 | | - // Something went terribly wrong, ABORT, ABORT! |
133 | | - return; |
134 | | - } |
135 | | - |
136 | | - if (str instanceof Token) { |
137 | | - continue; |
138 | | - } |
139 | | - |
140 | | - let removeCount = 1; // this is the to parameter of removeBetween |
141 | | - let match; |
142 | | - |
143 | | - if (greedy) { |
144 | | - match = matchPattern(pattern, pos, text, lookbehind); |
145 | | - if (!match || match.index >= text.length) { |
146 | | - break; |
147 | | - } |
148 | | - |
149 | | - const from = match.index; |
150 | | - const to = match.index + match[0].length; |
151 | | - let p = pos; |
152 | | - |
153 | | - // find the node that contains the match |
154 | | - p += currentNode.value.length; |
155 | | - while (from >= p) { |
156 | | - currentNode = currentNode.next; |
157 | | - if (currentNode.next === null) { |
158 | | - throw new Error( |
159 | | - 'The linked list and the actual text have become de-synced' |
160 | | - ); |
161 | | - } |
162 | | - p += currentNode.value.length; |
163 | | - } |
164 | | - // adjust pos (and p) |
165 | | - p -= currentNode.value.length; |
166 | | - pos = p; |
167 | | - |
168 | | - // the current node is a Token, then the match starts inside another Token, which is invalid |
169 | | - if (currentNode.value instanceof Token) { |
170 | | - continue; |
171 | | - } |
172 | | - |
173 | | - // find the last node which is affected by this match |
174 | | - let k: |
175 | | - | LinkedListMiddleNode<Token | string> |
176 | | - | LinkedListTailNode<Token | string> = currentNode; |
177 | | - for ( |
178 | | - ; |
179 | | - k.next !== null && (p < to || typeof k.value === 'string'); |
180 | | - k = k.next |
181 | | - ) { |
182 | | - removeCount++; |
183 | | - p += k.value.length; |
184 | | - } |
185 | | - removeCount--; |
186 | | - |
187 | | - // replace with the new match |
188 | | - str = text.slice(pos, p); |
189 | | - match.index -= pos; |
190 | | - } |
191 | | - else { |
192 | | - match = matchPattern(pattern, 0, str, lookbehind); |
193 | | - if (!match) { |
194 | | - continue; |
195 | | - } |
196 | | - } |
197 | | - |
198 | | - // eslint-disable-next-line no-redeclare |
199 | | - const from = match.index; |
200 | | - const matchStr = match[0]; |
201 | | - const before = str.slice(0, from); |
202 | | - const after = str.slice(from + matchStr.length); |
203 | | - |
204 | | - const reach = pos + str.length; |
205 | | - if (rematch && reach > rematch.reach) { |
206 | | - rematch.reach = reach; |
207 | | - } |
208 | | - |
209 | | - let removeFrom = currentNode.prev; |
210 | | - |
211 | | - if (before) { |
212 | | - removeFrom = tokenList.addAfter(removeFrom, before); |
213 | | - pos += before.length; |
214 | | - } |
215 | | - |
216 | | - tokenList.removeRange(removeFrom, removeCount); |
217 | | - |
218 | | - const wrapped = new Token( |
219 | | - token, |
220 | | - insideGrammar ? this.tokenize(matchStr, insideGrammar) : matchStr, |
221 | | - alias, |
222 | | - matchStr |
223 | | - ); |
224 | | - currentNode = tokenList.addAfter(removeFrom, wrapped); |
225 | | - |
226 | | - if (after) { |
227 | | - tokenList.addAfter(currentNode, after); |
228 | | - } |
229 | | - |
230 | | - if (removeCount > 1) { |
231 | | - // at least one Token object was removed, so we have to do some rematching |
232 | | - // this can only happen if the current pattern is greedy |
233 | | - |
234 | | - const nestedRematch: RematchOptions = { |
235 | | - cause: `${token},${j}`, |
236 | | - reach, |
237 | | - }; |
238 | | - this._matchGrammar( |
239 | | - text, |
240 | | - tokenList, |
241 | | - grammar, |
242 | | - currentNode.prev, |
243 | | - pos, |
244 | | - nestedRematch |
245 | | - ); |
246 | | - |
247 | | - // the reach might have been extended because of the rematching |
248 | | - if (rematch && nestedRematch.reach > rematch.reach) { |
249 | | - rematch.reach = nestedRematch.reach; |
250 | | - } |
251 | | - } |
252 | | - } |
253 | | - } |
254 | | - } |
255 | | - } |
256 | | -} |
257 | | - |
258 | | -interface RematchOptions { |
259 | | - cause: string; |
260 | | - reach: number; |
261 | | -} |
262 | | - |
263 | | -function matchPattern (pattern: RegExp, pos: number, text: string, lookbehind: boolean) { |
264 | | - pattern.lastIndex = pos; |
265 | | - const match = pattern.exec(text); |
266 | | - if (match && lookbehind && match[1]) { |
267 | | - // change the match to remove the text matched by the Prism lookbehind group |
268 | | - const lookbehindLength = match[1].length; |
269 | | - match.index += lookbehindLength; |
270 | | - match[0] = match[0].slice(lookbehindLength); |
271 | | - } |
272 | | - return match; |
273 | | -} |
274 | | - |
275 | | -function toGrammarToken (pattern: GrammarToken | RegExpLike): GrammarToken { |
276 | | - if (!pattern.pattern) { |
277 | | - return { pattern }; |
278 | | - } |
279 | | - else { |
280 | | - return pattern; |
281 | | - } |
282 | | -} |
283 | | - |
284 | | -function resolve ( |
285 | | - components: Registry, |
286 | | - reference: Grammar | string | null | undefined |
287 | | -): Grammar | undefined { |
288 | | - if (reference) { |
289 | | - if (typeof reference === 'string') { |
290 | | - return components.getLanguage(reference); |
291 | | - } |
292 | | - return reference; |
| 48 | + return tokenize.call(this, text, grammar); |
293 | 49 | } |
294 | | - return undefined; |
295 | 50 | } |
296 | 51 |
|
297 | 52 | export type { Prism }; |
0 commit comments