Skip to content

Commit 9da1a3a

Browse files
committed
add tests for parser and chunking
1 parent 7a72135 commit 9da1a3a

File tree

3 files changed

+517
-6
lines changed

3 files changed

+517
-6
lines changed

test/chunking.test.ts

Lines changed: 347 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,347 @@
1+
import { describe, expect, test } from 'bun:test'
2+
import { type Chunk, chunk, createChunker, type Language } from '../src'
3+
import {
4+
countNws,
5+
getNwsCountFromCumsum,
6+
preprocessNwsCumsum,
7+
} from '../src/chunking/nws'
8+
9+
// ============================================================================
10+
// NWS (Non-Whitespace) Preprocessing Tests
11+
// ============================================================================
12+
13+
describe('NWS preprocessing', () => {
14+
test('countNws counts non-whitespace characters', () => {
15+
expect(countNws('hello')).toBe(5)
16+
expect(countNws('hello world')).toBe(10)
17+
expect(countNws(' hello ')).toBe(5)
18+
expect(countNws('\t\n\r ')).toBe(0)
19+
expect(countNws('')).toBe(0)
20+
})
21+
22+
test('preprocessNwsCumsum builds cumulative sum array', () => {
23+
const code = 'ab cd'
24+
const cumsum = preprocessNwsCumsum(code)
25+
26+
// cumsum[i] = count of NWS chars in code[0..i-1]
27+
expect(cumsum[0]).toBe(0) // before any chars
28+
expect(cumsum[1]).toBe(1) // after 'a'
29+
expect(cumsum[2]).toBe(2) // after 'ab'
30+
expect(cumsum[3]).toBe(2) // after 'ab ' (space doesn't count)
31+
expect(cumsum[4]).toBe(3) // after 'ab c'
32+
expect(cumsum[5]).toBe(4) // after 'ab cd'
33+
})
34+
35+
test('getNwsCountFromCumsum returns O(1) range queries', () => {
36+
const code = 'function hello() { return 42; }'
37+
const cumsum = preprocessNwsCumsum(code)
38+
39+
// Full range
40+
const fullNws = getNwsCountFromCumsum(cumsum, 0, code.length)
41+
expect(fullNws).toBe(countNws(code))
42+
43+
// Partial range
44+
const partialNws = getNwsCountFromCumsum(cumsum, 0, 8) // 'function'
45+
expect(partialNws).toBe(8)
46+
})
47+
})
48+
49+
// ============================================================================
50+
// Chunking Tests
51+
// ============================================================================
52+
53+
describe('chunk', () => {
54+
test('chunks simple TypeScript file', async () => {
55+
const code = `
56+
function greet(name: string): string {
57+
return \`Hello, \${name}!\`
58+
}
59+
`
60+
const chunks = await chunk('test.ts', code)
61+
62+
expect(chunks.length).toBeGreaterThan(0)
63+
expect(chunks[0]).toHaveProperty('text')
64+
expect(chunks[0]).toHaveProperty('byteRange')
65+
expect(chunks[0]).toHaveProperty('lineRange')
66+
expect(chunks[0]).toHaveProperty('context')
67+
expect(chunks[0]).toHaveProperty('index')
68+
expect(chunks[0]).toHaveProperty('totalChunks')
69+
})
70+
71+
test('chunks preserve original text via source slicing', async () => {
72+
const code = `const x = 1
73+
const y = 2
74+
const z = 3`
75+
76+
const chunks = await chunk('test.ts', code)
77+
78+
// Reconstruct should match slicing from original
79+
for (const c of chunks) {
80+
const sliced = code.slice(c.byteRange.start, c.byteRange.end)
81+
expect(c.text).toBe(sliced)
82+
}
83+
})
84+
85+
test('chunks have correct index and totalChunks', async () => {
86+
const code = `
87+
function a() { return 1 }
88+
function b() { return 2 }
89+
function c() { return 3 }
90+
`
91+
const chunks = await chunk('test.ts', code)
92+
93+
const total = chunks.length
94+
chunks.forEach((c, i) => {
95+
expect(c.index).toBe(i)
96+
expect(c.totalChunks).toBe(total)
97+
})
98+
})
99+
100+
test('respects maxChunkSize option', async () => {
101+
// Create code that would be large
102+
const functions = Array.from(
103+
{ length: 10 },
104+
(_, i) => `function fn${i}() { return ${i} }`,
105+
).join('\n')
106+
107+
const chunks = await chunk('test.ts', functions, { maxChunkSize: 100 })
108+
109+
// With small maxChunkSize, should produce multiple chunks
110+
expect(chunks.length).toBeGreaterThan(1)
111+
112+
// Each chunk's NWS count should be reasonable
113+
for (const c of chunks) {
114+
const nws = countNws(c.text)
115+
// Allow some overflow due to atomic nodes
116+
expect(nws).toBeLessThan(200)
117+
}
118+
})
119+
120+
test('handles empty code', async () => {
121+
const chunks = await chunk('test.ts', '')
122+
expect(chunks).toEqual([])
123+
})
124+
125+
test('handles code with only whitespace', async () => {
126+
const chunks = await chunk('test.ts', ' \n\n \t\t ')
127+
expect(chunks.length).toBe(0)
128+
})
129+
130+
test('throws UnsupportedLanguageError for unknown extension', async () => {
131+
await expect(chunk('test.xyz', 'code')).rejects.toThrow(
132+
'Unsupported file type',
133+
)
134+
})
135+
136+
test('allows language override via options', async () => {
137+
const code = 'const x = 1'
138+
139+
// Even with wrong extension, should work with language override
140+
const chunks = await chunk('test.txt', code, { language: 'typescript' })
141+
expect(chunks.length).toBeGreaterThan(0)
142+
})
143+
})
144+
145+
// ============================================================================
146+
// Chunker Factory Tests
147+
// ============================================================================
148+
149+
describe('createChunker', () => {
150+
test('creates a reusable chunker instance', async () => {
151+
const chunker = createChunker('test.ts', { maxChunkSize: 500 })
152+
153+
const code1 = 'const a = 1'
154+
const code2 = 'const b = 2'
155+
156+
const chunks1 = await chunker.chunk(code1)
157+
const chunks2 = await chunker.chunk(code2)
158+
159+
expect(chunks1.length).toBeGreaterThan(0)
160+
expect(chunks2.length).toBeGreaterThan(0)
161+
})
162+
163+
test('chunker.stream yields chunks', async () => {
164+
const chunker = createChunker('test.ts')
165+
const code = `
166+
function a() { return 1 }
167+
function b() { return 2 }
168+
`
169+
const chunks: Chunk[] = []
170+
for await (const c of chunker.stream(code)) {
171+
chunks.push(c)
172+
}
173+
174+
expect(chunks.length).toBeGreaterThan(0)
175+
})
176+
})
177+
178+
// ============================================================================
179+
// Multi-language Chunking Tests
180+
// ============================================================================
181+
182+
describe('multi-language chunking', () => {
183+
const testCases: { lang: Language; ext: string; code: string }[] = [
184+
{
185+
lang: 'typescript',
186+
ext: 'ts',
187+
code: `
188+
interface User {
189+
name: string
190+
age: number
191+
}
192+
193+
function greet(user: User): string {
194+
return \`Hello, \${user.name}!\`
195+
}
196+
`,
197+
},
198+
{
199+
lang: 'javascript',
200+
ext: 'js',
201+
code: `
202+
class Calculator {
203+
add(a, b) {
204+
return a + b
205+
}
206+
207+
subtract(a, b) {
208+
return a - b
209+
}
210+
}
211+
`,
212+
},
213+
{
214+
lang: 'python',
215+
ext: 'py',
216+
code: `
217+
class Calculator:
218+
def add(self, a, b):
219+
return a + b
220+
221+
def subtract(self, a, b):
222+
return a - b
223+
`,
224+
},
225+
{
226+
lang: 'rust',
227+
ext: 'rs',
228+
code: `
229+
fn main() {
230+
println!("Hello, world!");
231+
}
232+
233+
fn add(a: i32, b: i32) -> i32 {
234+
a + b
235+
}
236+
`,
237+
},
238+
{
239+
lang: 'go',
240+
ext: 'go',
241+
code: `
242+
package main
243+
244+
func main() {
245+
fmt.Println("Hello, world!")
246+
}
247+
248+
func add(a, b int) int {
249+
return a + b
250+
}
251+
`,
252+
},
253+
{
254+
lang: 'java',
255+
ext: 'java',
256+
code: `
257+
public class Main {
258+
public static void main(String[] args) {
259+
System.out.println("Hello, world!");
260+
}
261+
262+
public static int add(int a, int b) {
263+
return a + b;
264+
}
265+
}
266+
`,
267+
},
268+
]
269+
270+
for (const { lang, ext, code } of testCases) {
271+
test(`chunks ${lang} code correctly`, async () => {
272+
const chunks = await chunk(`test.${ext}`, code)
273+
274+
expect(chunks.length).toBeGreaterThan(0)
275+
276+
// All chunks should have valid structure
277+
for (const c of chunks) {
278+
expect(c.text.length).toBeGreaterThan(0)
279+
expect(c.byteRange.end).toBeGreaterThan(c.byteRange.start)
280+
expect(c.lineRange.end).toBeGreaterThanOrEqual(c.lineRange.start)
281+
}
282+
})
283+
}
284+
})
285+
286+
// ============================================================================
287+
// Edge Cases
288+
// ============================================================================
289+
290+
describe('edge cases', () => {
291+
test('handles very long single line', async () => {
292+
const longLine = `const x = ${'"a"'.repeat(1000)}`
293+
const chunks = await chunk('test.ts', longLine, { maxChunkSize: 100 })
294+
295+
// Should handle without crashing
296+
expect(chunks.length).toBeGreaterThan(0)
297+
})
298+
299+
test('handles deeply nested code', async () => {
300+
const nested = `
301+
function outer() {
302+
function inner1() {
303+
function inner2() {
304+
function inner3() {
305+
return 42
306+
}
307+
return inner3()
308+
}
309+
return inner2()
310+
}
311+
return inner1()
312+
}
313+
`
314+
const chunks = await chunk('test.ts', nested)
315+
expect(chunks.length).toBeGreaterThan(0)
316+
})
317+
318+
test('handles unicode characters', async () => {
319+
const code = `
320+
const greeting = "こんにちは"
321+
const emoji = "🎉🚀✨"
322+
`
323+
const chunks = await chunk('test.ts', code)
324+
325+
expect(chunks.length).toBeGreaterThan(0)
326+
// Should preserve unicode
327+
const allText = chunks.map((c) => c.text).join('')
328+
expect(allText).toContain('こんにちは')
329+
expect(allText).toContain('🎉')
330+
})
331+
332+
test('handles code with comments', async () => {
333+
const code = `
334+
// Single line comment
335+
/* Multi-line
336+
comment */
337+
/**
338+
* JSDoc comment
339+
*/
340+
function documented() {
341+
return 1
342+
}
343+
`
344+
const chunks = await chunk('test.ts', code)
345+
expect(chunks.length).toBeGreaterThan(0)
346+
})
347+
})

test/index.test.ts

Lines changed: 0 additions & 6 deletions
This file was deleted.

0 commit comments

Comments
 (0)