Skip to content

Commit 410c50d

Browse files
committed
add docs for code-chunk/wasm
1 parent cda366b commit 410c50d

File tree

3 files changed

+131
-23
lines changed

3 files changed

+131
-23
lines changed

packages/code-chunk/README.md

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Uses tree-sitter to split source code at semantic boundaries (functions, classes
1010
- [How It Works](#how-it-works)
1111
- [Installation](#installation)
1212
- [Quickstart](#quickstart)
13+
- [Edge Runtimes (WASM)](#edge-runtimes-wasm)
1314
- [API Reference](#api-reference)
1415
- [License](#license)
1516

@@ -21,6 +22,7 @@ Uses tree-sitter to split source code at semantic boundaries (functions, classes
2122
- **Multi-language**: TypeScript, JavaScript, Python, Rust, Go, Java
2223
- **Streaming**: Process large files incrementally
2324
- **Effect support**: First-class Effect integration
25+
- **Edge-ready**: Works in Cloudflare Workers and other edge runtimes via WASM
2426

2527
## How It Works
2628

@@ -159,6 +161,81 @@ const program = Stream.runForEach(
159161
await Effect.runPromise(program)
160162
```
161163

164+
## Edge Runtimes (WASM)
165+
166+
The default entry point uses Node.js APIs to load tree-sitter WASM files from the filesystem. For edge runtimes, use the `code-chunk/wasm` entry point which accepts pre-loaded WASM binaries.
167+
168+
### Cloudflare Workers
169+
170+
```typescript
171+
import { createChunker } from 'code-chunk/wasm'
172+
173+
import treeSitterWasm from 'web-tree-sitter/tree-sitter.wasm'
174+
import typescriptWasm from 'tree-sitter-typescript/tree-sitter-typescript.wasm'
175+
import javascriptWasm from 'tree-sitter-javascript/tree-sitter-javascript.wasm'
176+
177+
export default {
178+
async fetch(request: Request): Promise<Response> {
179+
const chunker = await createChunker({
180+
treeSitter: treeSitterWasm,
181+
languages: {
182+
typescript: typescriptWasm,
183+
javascript: javascriptWasm,
184+
},
185+
})
186+
187+
const code = await request.text()
188+
const chunks = await chunker.chunk('input.ts', code)
189+
190+
return Response.json(chunks)
191+
},
192+
}
193+
```
194+
195+
### WasmConfig
196+
197+
The `createChunker` function from `code-chunk/wasm` accepts a `WasmConfig` object:
198+
199+
```typescript
200+
interface WasmConfig {
201+
treeSitter: WasmBinary
202+
languages: Partial<Record<Language, WasmBinary>>
203+
}
204+
205+
type WasmBinary = Uint8Array | ArrayBuffer | Response | string
206+
```
207+
208+
- `treeSitter`: The `web-tree-sitter` runtime WASM binary
209+
- `languages`: Map of language names to their grammar WASM binaries
210+
211+
Only include the languages you need to minimize bundle size.
212+
213+
### WASM Errors
214+
215+
The WASM entry point throws specific errors:
216+
217+
- **`WasmParserError`**: Parser initialization or parsing failed
218+
- **`WasmGrammarError`**: No WASM binary provided for requested language
219+
- **`WasmChunkingError`**: Chunking process failed
220+
- **`UnsupportedLanguageError`**: File extension not recognized
221+
222+
```typescript
223+
import {
224+
WasmParserError,
225+
WasmGrammarError,
226+
WasmChunkingError,
227+
UnsupportedLanguageError
228+
} from 'code-chunk/wasm'
229+
230+
try {
231+
const chunks = await chunker.chunk('input.ts', code)
232+
} catch (error) {
233+
if (error instanceof WasmGrammarError) {
234+
console.error(`Language not loaded: ${error.language}`)
235+
}
236+
}
237+
```
238+
162239
## API Reference
163240

164241
### `chunk(filepath, code, options?)`
@@ -202,6 +279,40 @@ Create a reusable chunker instance with default options.
202279

203280
---
204281

282+
### `createChunker(config, options?)` (WASM)
283+
284+
Create a chunker for edge runtimes with pre-loaded WASM binaries.
285+
286+
```typescript
287+
import { createChunker } from 'code-chunk/wasm'
288+
```
289+
290+
**Parameters:**
291+
- `config`: `WasmConfig` with `treeSitter` and `languages` WASM binaries
292+
- `options`: Optional `ChunkOptions`
293+
294+
**Returns:** `Promise<Chunker>`
295+
296+
**Throws:** `WasmParserError`, `WasmGrammarError`, `WasmChunkingError`, `UnsupportedLanguageError`
297+
298+
---
299+
300+
### `WasmParser`
301+
302+
Low-level parser class for edge runtimes. Use this when you need direct access to parsing without chunking.
303+
304+
```typescript
305+
import { WasmParser } from 'code-chunk/wasm'
306+
307+
const parser = new WasmParser(config)
308+
await parser.init()
309+
310+
const result = await parser.parse(code, 'typescript')
311+
console.log(result.tree.rootNode)
312+
```
313+
314+
---
315+
205316
### `formatChunkWithContext(text, context, overlapText?)`
206317

207318
Format chunk text with semantic context prepended. Useful for custom embedding pipelines.

packages/code-chunk/src/wasm.ts

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,35 @@
11
import { Effect } from 'effect'
2-
3-
import type {
4-
Chunk,
5-
Chunker,
6-
ChunkOptions,
7-
Language,
8-
WasmConfig,
9-
} from './types'
10-
112
import {
123
chunk as chunkInternal,
134
DEFAULT_CHUNK_OPTIONS,
145
streamChunks as streamChunksInternal,
156
} from './chunking'
167
import { extractEntities } from './extract'
17-
import { WasmParser } from './parser/wasm'
188
import { detectLanguage } from './parser/languages'
9+
import { WasmParser } from './parser/wasm'
1910
import { buildScopeTree } from './scope'
11+
import type {
12+
Chunk,
13+
Chunker,
14+
ChunkOptions,
15+
Language,
16+
WasmConfig,
17+
} from './types'
2018

19+
export { formatChunkWithContext } from './context/format'
20+
export { detectLanguage, LANGUAGE_EXTENSIONS } from './parser/languages'
21+
export {
22+
createWasmParser,
23+
WasmGrammarError,
24+
WasmParser,
25+
WasmParserError,
26+
} from './parser/wasm'
2127
export type {
2228
Chunk,
2329
ChunkContext,
2430
ChunkEntityInfo,
25-
ChunkOptions,
2631
Chunker,
32+
ChunkOptions,
2733
EntityInfo,
2834
EntityType,
2935
ImportInfo,
@@ -34,15 +40,6 @@ export type {
3440
WasmConfig,
3541
} from './types'
3642

37-
export { formatChunkWithContext } from './context/format'
38-
export {
39-
WasmGrammarError,
40-
WasmParser,
41-
WasmParserError,
42-
createWasmParser,
43-
} from './parser/wasm'
44-
export { detectLanguage, LANGUAGE_EXTENSIONS } from './parser/languages'
45-
4643
export class WasmChunkingError extends Error {
4744
readonly _tag = 'WasmChunkingError'
4845
override readonly cause?: unknown

packages/code-chunk/test/wasm.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
1+
import { describe, expect, test } from 'bun:test'
12
import { readFile } from 'node:fs/promises'
23
import { resolve } from 'node:path'
3-
import { describe, expect, test } from 'bun:test'
44

55
import type { WasmConfig } from '../src/types'
66

77
import {
8+
createChunker,
9+
UnsupportedLanguageError,
810
WasmChunkingError,
911
WasmGrammarError,
1012
WasmParser,
1113
WasmParserError,
12-
createChunker,
13-
UnsupportedLanguageError,
1414
} from '../src/wasm'
1515

1616
async function loadWasmBinary(packagePath: string): Promise<Uint8Array> {

0 commit comments

Comments
 (0)