|
| 1 | +// This is a shared buffer that is used to keep track of the current nesting level |
| 2 | +// of parens, brackets, and braces. It is used to determine if a character is at |
| 3 | +// the top-level of a string. This is a performance optimization to avoid memory |
| 4 | +// allocations on every call to `segment`. |
| 5 | +const closingBracketStack = new Uint8Array(256) |
| 6 | + |
| 7 | +// All numbers are equivalent to the value returned by `String#charCodeAt(0)` |
| 8 | +const BACKSLASH = 0x5c |
| 9 | +const OPEN_PAREN = 0x28 |
| 10 | +const OPEN_BRACKET = 0x5b |
| 11 | +const OPEN_CURLY = 0x7b |
| 12 | +const CLOSE_PAREN = 0x29 |
| 13 | +const CLOSE_BRACKET = 0x5d |
| 14 | +const CLOSE_CURLY = 0x7d |
| 15 | + |
1 | 16 | /**
|
2 | 17 | * This splits a string on a top-level character.
|
3 | 18 | *
|
4 |
| - * Regex doesn't support recursion (at least not the JS-flavored version). |
5 |
| - * So we have to use a tiny state machine to keep track of paren placement. |
| 19 | + * Regex doesn't support recursion (at least not the JS-flavored version), |
| 20 | + * so we have to use a tiny state machine to keep track of paren placement. |
6 | 21 | *
|
7 | 22 | * Expected behavior using commas:
|
8 | 23 | * var(--a, 0 0 1px rgb(0, 0, 0)), 0 0 1px rgb(0, 0, 0)
|
|
11 | 26 | * ╰──────────────┴──┴───────────── Ignored b/c inside >= 1 levels of parens
|
12 | 27 | */
|
13 | 28 | export function segment(input: string, separator: string) {
|
14 |
| - // Stack of characters to close open brackets. Appending to a string because |
15 |
| - // it's faster than an array of strings. |
16 |
| - let closingBracketStack = '' |
| 29 | + // SAFETY: We can use an index into a shared buffer because this function is |
| 30 | + // synchronous, non-recursive, and runs in a single-threaded envionment. |
| 31 | + let stackPos = 0 |
17 | 32 | let parts: string[] = []
|
18 | 33 | let lastPos = 0
|
19 | 34 |
|
| 35 | + let separatorCode = separator.charCodeAt(0) |
| 36 | + |
20 | 37 | for (let idx = 0; idx < input.length; idx++) {
|
21 |
| - let char = input[idx] |
| 38 | + let char = input.charCodeAt(idx) |
22 | 39 |
|
23 |
| - if (closingBracketStack.length === 0 && char === separator) { |
| 40 | + if (stackPos === 0 && char === separatorCode) { |
24 | 41 | parts.push(input.slice(lastPos, idx))
|
25 | 42 | lastPos = idx + 1
|
26 | 43 | continue
|
27 | 44 | }
|
28 | 45 |
|
29 | 46 | switch (char) {
|
30 |
| - case '\\': |
| 47 | + case BACKSLASH: |
31 | 48 | // The next character is escaped, so we skip it.
|
32 | 49 | idx += 1
|
33 | 50 | break
|
34 |
| - case '(': |
35 |
| - closingBracketStack += ')' |
| 51 | + case OPEN_PAREN: |
| 52 | + closingBracketStack[stackPos] = CLOSE_PAREN |
| 53 | + stackPos++ |
36 | 54 | break
|
37 |
| - case '[': |
38 |
| - closingBracketStack += ']' |
| 55 | + case OPEN_BRACKET: |
| 56 | + closingBracketStack[stackPos] = CLOSE_BRACKET |
| 57 | + stackPos++ |
39 | 58 | break
|
40 |
| - case '{': |
41 |
| - closingBracketStack += '}' |
| 59 | + case OPEN_CURLY: |
| 60 | + closingBracketStack[stackPos] = CLOSE_CURLY |
| 61 | + stackPos++ |
42 | 62 | break
|
43 |
| - case ')': |
44 |
| - case ']': |
45 |
| - case '}': |
46 |
| - if ( |
47 |
| - closingBracketStack.length > 0 && |
48 |
| - char === closingBracketStack[closingBracketStack.length - 1] |
49 |
| - ) { |
50 |
| - closingBracketStack = closingBracketStack.slice(0, closingBracketStack.length - 1) |
| 63 | + case CLOSE_BRACKET: |
| 64 | + case CLOSE_CURLY: |
| 65 | + case CLOSE_PAREN: |
| 66 | + if (stackPos > 0 && char === closingBracketStack[stackPos - 1]) { |
| 67 | + // SAFETY: The buffer does not need to be mutated because the stack is |
| 68 | + // only ever read from or written to its current position. Its current |
| 69 | + // position is only ever incremented after writing to it. Meaning that |
| 70 | + // the buffer can be dirty for the next use and still be correct since |
| 71 | + // reading/writing always starts at position `0`. |
| 72 | + stackPos-- |
51 | 73 | }
|
52 | 74 | break
|
53 | 75 | }
|
|
0 commit comments