Skip to content

Commit 8c236a7

Browse files
authored
feat: handle super large (65535+ char) token lengths (#88)
closes #77 very negligible impact on performance (<1%)
1 parent 51bf98d commit 8c236a7

File tree

3 files changed

+128
-43
lines changed

3 files changed

+128
-43
lines changed

src/arena.test.ts

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { describe, test, expect } from 'vitest'
22
import { readFileSync } from 'fs'
3-
import { CSSDataArena, STYLESHEET, STYLE_RULE, DECLARATION, FLAG_IMPORTANT, FLAG_HAS_ERROR } from './arena'
3+
import { CSSDataArena, STYLESHEET, STYLE_RULE, DECLARATION, FLAG_IMPORTANT, FLAG_HAS_ERROR, FLAG_LENGTH_OVERFLOW } from './arena'
44
import { parse } from './parse'
55

66
describe('CSSDataArena', () => {
@@ -237,6 +237,27 @@ describe('CSSDataArena', () => {
237237
arena.set_flags(node, 0)
238238
expect(arena.get_flags(node)).toBe(0)
239239
})
240+
241+
test('should set FLAG_LENGTH_OVERFLOW when length > 65535', () => {
242+
const arena = new CSSDataArena(10)
243+
const node = arena.create_node(DECLARATION, 0, 0, 1, 1)
244+
245+
// Set length to exceed Uint16 max (65535)
246+
arena.set_length(node, 70000)
247+
248+
expect(arena.get_length(node)).toBe(70000) // Should return actual length from Map
249+
expect(arena.has_flag(node, FLAG_LENGTH_OVERFLOW)).toBe(true)
250+
})
251+
252+
test('should not set FLAG_LENGTH_OVERFLOW when length <= 65535', () => {
253+
const arena = new CSSDataArena(10)
254+
const node = arena.create_node(DECLARATION, 0, 0, 1, 1)
255+
256+
arena.set_length(node, 65535)
257+
258+
expect(arena.get_length(node)).toBe(65535)
259+
expect(arena.has_flag(node, FLAG_LENGTH_OVERFLOW)).toBe(false)
260+
})
240261
})
241262

242263
describe('growth tracking', () => {

src/arena.ts

Lines changed: 44 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// CSS Data Arena - Single contiguous ArrayBuffer for all AST nodes
22
//
3-
// Each node occupies 32 bytes with the following layout:
3+
// Each node occupies 36 bytes with the following layout:
44
// Offset | Size | Field
55
// -------|------|-------------
66
// 0 | 1 | type
@@ -14,36 +14,34 @@
1414
// 20 | 2 | contentLength
1515
// 22 | 2 | valueLength
1616
// 24 | 4 | startLine
17-
// 28 | 2 | startColumn
18-
// 30 | 1 | attr_operator (reusing padding)
19-
// 31 | 1 | attr_flags (reusing padding)
17+
// 28 | 4 | startColumn
18+
// 32 | 1 | attr_operator
19+
// 33 | 1 | attr_flags
20+
// 34 | 2 | (padding)
2021
//
2122
// HOW THE ARENA WORKS:
22-
// 1. BYTES_PER_NODE defines the size of each node (32 bytes). The ArrayBuffer size is calculated
23-
// as: capacity × BYTES_PER_NODE. For example, 1024 nodes = 32,768 bytes (32KB).
24-
// Node indices map to byte offsets via: node_offset = node_index × 32.
23+
// 1. BYTES_PER_NODE defines the size of each node (36 bytes). The ArrayBuffer size is calculated
24+
// as: capacity × BYTES_PER_NODE. For example, 1024 nodes = 36,864 bytes (36KB).
25+
// Node indices map to byte offsets via: node_offset = node_index × 36.
2526
//
2627
// 2. We use a single DataView over the ArrayBuffer to read/write different types at specific offsets.
2728
// - Uint8: 1-byte reads/writes for type, flags (e.g., view.getUint8(offset))
28-
// - Uint16: 2-byte reads/writes for length, deltas, column (e.g., view.getUint16(offset, true))
29-
// - Uint32: 4-byte reads/writes for startOffset, pointers, line (e.g., view.getUint32(offset, true))
29+
// - Uint16: 2-byte reads/writes for length, deltas (e.g., view.getUint16(offset, true))
30+
// - Uint32: 4-byte reads/writes for startOffset, pointers, line, column (e.g., view.getUint32(offset, true))
3031
// The 'true' parameter specifies little-endian byte order (native on x86/ARM CPUs).
3132
//
32-
// 3. Padding (2 bytes total at offsets 30-31) ensures memory alignment for performance:
33-
// - Uint32 fields align to 4-byte boundaries (offsets 4, 8, 12, 24)
34-
// - Uint16 fields align to 2-byte boundaries (offsets 2, 16, 18, 20, 22, 28)
33+
// 3. Padding (2 bytes at offsets 34-35) ensures memory alignment for performance:
34+
// - Uint32 fields align to 4-byte boundaries (offsets 4, 8, 12, 24, 28)
35+
// - Uint16 fields align to 2-byte boundaries (offsets 2, 16, 18, 20, 22)
3536
// Aligned access is faster (single CPU instruction) vs unaligned (multiple memory accesses).
3637
// Modern CPUs penalize unaligned reads/writes, making padding essential for performance.
3738
//
38-
// 4. The padding at offset 30-31 is reused for attribute selector data (attr_operator, attr_flags),
39-
// making efficient use of otherwise wasted bytes. This is a space optimization trick.
40-
//
41-
// 5. Delta offsets (contentStartDelta, valueStartDelta) save memory: instead of storing absolute
39+
// 4. Delta offsets (contentStartDelta, valueStartDelta) save memory: instead of storing absolute
4240
// positions as uint32 (4 bytes), we store relative offsets as uint16 (2 bytes). Removing unused
43-
// lastChild field saved another 4 bytes. This reduced node size from 44→40→36→32 bytes (27%
44-
// smaller than original), saving memory while maintaining performance.
41+
// lastChild field saved another 4 bytes. startColumn was changed from Uint16 to Uint32 to avoid
42+
// overflow on long lines (common in minified CSS). Node size: 44→40→36 bytes.
4543

46-
let BYTES_PER_NODE = 32
44+
let BYTES_PER_NODE = 36
4745

4846
// Node type constants
4947
export const STYLESHEET = 1
@@ -119,6 +117,7 @@ export class CSSDataArena {
119117
private capacity: number // Number of nodes that can fit
120118
private count: number // Number of nodes currently allocated
121119
private growth_count: number // Number of times the arena has grown
120+
private overflow_lengths: Map<number, number> // Stores actual lengths for nodes > 65535 chars
122121

123122
// Growth multiplier when capacity is exceeded
124123
private static readonly GROWTH_FACTOR = 1.3
@@ -136,6 +135,7 @@ export class CSSDataArena {
136135
this.growth_count = 0
137136
this.buffer = new ArrayBuffer(initial_capacity * BYTES_PER_NODE)
138137
this.view = new DataView(this.buffer)
138+
this.overflow_lengths = new Map()
139139
}
140140

141141
// Calculate recommended initial capacity based on CSS source size
@@ -185,6 +185,13 @@ export class CSSDataArena {
185185

186186
// Read length in source
187187
get_length(node_index: number): number {
188+
// Check if this node has overflow length stored
189+
if (this.has_flag(node_index, FLAG_LENGTH_OVERFLOW)) {
190+
const overflow_length = this.overflow_lengths.get(node_index)
191+
if (overflow_length !== undefined) {
192+
return overflow_length
193+
}
194+
}
188195
return this.view.getUint16(this.node_offset(node_index) + 2, true)
189196
}
190197

@@ -202,12 +209,12 @@ export class CSSDataArena {
202209

203210
// Read attribute operator (for NODE_SELECTOR_ATTRIBUTE)
204211
get_attr_operator(node_index: number): number {
205-
return this.view.getUint8(this.node_offset(node_index) + 30)
212+
return this.view.getUint8(this.node_offset(node_index) + 32)
206213
}
207214

208215
// Read attribute flags (for NODE_SELECTOR_ATTRIBUTE)
209216
get_attr_flags(node_index: number): number {
210-
return this.view.getUint8(this.node_offset(node_index) + 31)
217+
return this.view.getUint8(this.node_offset(node_index) + 33)
211218
}
212219

213220
// Read first child index (0 = no children)
@@ -227,7 +234,7 @@ export class CSSDataArena {
227234

228235
// Read start column
229236
get_start_column(node_index: number): number {
230-
return this.view.getUint16(this.node_offset(node_index) + 28, true)
237+
return this.view.getUint32(this.node_offset(node_index) + 28, true)
231238
}
232239

233240
// Read value start offset (stored as delta from startOffset, declaration value / at-rule prelude)
@@ -254,14 +261,17 @@ export class CSSDataArena {
254261
this.view.setUint8(this.node_offset(node_index) + 1, flags)
255262
}
256263

257-
// Write start offset in source
258-
set_start_offset(node_index: number, offset: number): void {
259-
this.view.setUint32(this.node_offset(node_index) + 12, offset, true)
260-
}
261-
262264
// Write length in source
263265
set_length(node_index: number, length: number): void {
264-
this.view.setUint16(this.node_offset(node_index) + 2, length, true)
266+
// Uint16 max value is 65535
267+
if (length > 65535) {
268+
this.view.setUint16(this.node_offset(node_index) + 2, 65535, true)
269+
this.set_flag(node_index, FLAG_LENGTH_OVERFLOW)
270+
// Store the actual length in the overflow map
271+
this.overflow_lengths.set(node_index, length)
272+
} else {
273+
this.view.setUint16(this.node_offset(node_index) + 2, length, true)
274+
}
265275
}
266276

267277
// Write content start delta (offset from startOffset)
@@ -276,12 +286,12 @@ export class CSSDataArena {
276286

277287
// Write attribute operator (for NODE_SELECTOR_ATTRIBUTE)
278288
set_attr_operator(node_index: number, operator: number): void {
279-
this.view.setUint8(this.node_offset(node_index) + 30, operator)
289+
this.view.setUint8(this.node_offset(node_index) + 32, operator)
280290
}
281291

282292
// Write attribute flags (for NODE_SELECTOR_ATTRIBUTE)
283293
set_attr_flags(node_index: number, flags: number): void {
284-
this.view.setUint8(this.node_offset(node_index) + 31, flags)
294+
this.view.setUint8(this.node_offset(node_index) + 33, flags)
285295
}
286296

287297
// Write first child index
@@ -294,16 +304,6 @@ export class CSSDataArena {
294304
this.view.setUint32(this.node_offset(node_index) + 8, siblingIndex, true)
295305
}
296306

297-
// Write start line
298-
set_start_line(node_index: number, line: number): void {
299-
this.view.setUint32(this.node_offset(node_index) + 24, line, true)
300-
}
301-
302-
// Write start column
303-
set_start_column(node_index: number, column: number): void {
304-
this.view.setUint16(this.node_offset(node_index) + 28, column, true)
305-
}
306-
307307
// Write value start delta (offset from startOffset, declaration value / at-rule prelude)
308308
set_value_start_delta(node_index: number, delta: number): void {
309309
this.view.setUint16(this.node_offset(node_index) + 18, delta, true)
@@ -342,10 +342,12 @@ export class CSSDataArena {
342342

343343
const offset = node_index * BYTES_PER_NODE
344344
this.view.setUint8(offset, type) // +0: type
345-
this.view.setUint16(offset + 2, length, true) // +2: length
346345
this.view.setUint32(offset + 12, start_offset, true) // +12: startOffset
347346
this.view.setUint32(offset + 24, start_line, true) // +24: startLine
348-
this.view.setUint16(offset + 28, start_column, true) // +28: startColumn
347+
this.view.setUint32(offset + 28, start_column, true) // +28: startColumn
348+
349+
// Use setter method to handle overflow
350+
this.set_length(node_index, length)
349351

350352
return node_index
351353
}

src/parse.test.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
TYPE_SELECTOR,
1313
ATTRIBUTE_SELECTOR,
1414
NESTING_SELECTOR,
15+
URL,
1516
} from './constants'
1617
import { ATTR_OPERATOR_PIPE_EQUAL } from './arena'
1718

@@ -2508,4 +2509,65 @@ describe('Core Nodes', () => {
25082509
})
25092510
})
25102511
})
2512+
2513+
describe('Large inline SVG', () => {
2514+
test('should correctly parse declaration with huge inline SVG background-image', () => {
2515+
// Generate a very long SVG string (> 65535 chars)
2516+
const svgPart = '<svg xmlns="http://www.w3.org/2000/svg"><rect width="100" height="100" fill="red"/></svg>'
2517+
const longSvg = svgPart.repeat(1000) // 89,000 chars
2518+
// Add a second declaration after the huge SVG to test startColumn overflow
2519+
const css = `.test { background-image: url("data:image/svg+xml,${longSvg}"); color: red; }`
2520+
2521+
expect(longSvg.length).toBeGreaterThan(65535) // Verify SVG is long enough
2522+
2523+
const ast = parse(css)
2524+
const rule = ast.first_child!
2525+
const block = rule.block!
2526+
const declaration = block.first_child!
2527+
2528+
// Verify declaration is parsed correctly
2529+
expect(declaration.type).toBe(DECLARATION)
2530+
expect(declaration.property).toBe('background-image')
2531+
2532+
// Verify the full length is accessible (not truncated)
2533+
const declText = `background-image: url("data:image/svg+xml,${longSvg}");`
2534+
expect(declaration.length).toBe(declText.length)
2535+
expect(declaration.length).toBeGreaterThan(65535)
2536+
2537+
// Verify we can access the full declaration text
2538+
expect(declaration.text).toBe(declText)
2539+
expect(declaration.text).toContain('background-image:')
2540+
expect(declaration.text).toContain(longSvg.substring(0, 100))
2541+
expect(declaration.text).toContain(longSvg.substring(longSvg.length - 100))
2542+
2543+
// Verify the value is parsed into nodes
2544+
const urlNode = declaration.first_child!
2545+
expect(urlNode.type).toBe(URL)
2546+
expect(urlNode.name).toBe('url')
2547+
2548+
// Verify the URL node text (full url(...) including function name and parens)
2549+
const expectedUrlText = `url("data:image/svg+xml,${longSvg}")`
2550+
expect(urlNode.text).toBe(expectedUrlText)
2551+
expect(urlNode.text.length).toBe(expectedUrlText.length)
2552+
2553+
// Verify the URL node length matches its text length
2554+
expect(urlNode.length).toBe(urlNode.text.length)
2555+
expect(urlNode.length).toBe(expectedUrlText.length)
2556+
expect(urlNode.length).toBeGreaterThan(65535)
2557+
2558+
// Test startColumn overflow: second declaration starts at column > 65535
2559+
const secondDecl = declaration.next_sibling!
2560+
expect(secondDecl).toBeTruthy()
2561+
expect(secondDecl.type).toBe(DECLARATION)
2562+
expect(secondDecl.property).toBe('color')
2563+
expect(secondDecl.value).toBe('red')
2564+
2565+
// Calculate expected column: '.test { ' + declaration.text + ' ' + 1 (columns are 1-indexed)
2566+
const expectedColumn = '.test { '.length + declText.length + ' '.length + 1
2567+
expect(expectedColumn).toBeGreaterThan(65535)
2568+
2569+
// Verify column is correctly stored (Uint32, no overflow needed)
2570+
expect(secondDecl.column).toBe(expectedColumn)
2571+
})
2572+
})
25112573
})

0 commit comments

Comments
 (0)