Skip to content

Commit c06cf13

Browse files
committed
setup project structure with tree-sitter and effect
1 parent 905c175 commit c06cf13

File tree

23 files changed

+2110
-4
lines changed

23 files changed

+2110
-4
lines changed

bun.lock

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@
66
"name": "astchunk",
77
"dependencies": {
88
"effect": "^3.19.12",
9+
"tree-sitter-go": "^0.25.0",
10+
"tree-sitter-java": "^0.23.5",
11+
"tree-sitter-javascript": "^0.25.0",
12+
"tree-sitter-python": "^0.25.0",
13+
"tree-sitter-rust": "^0.24.0",
14+
"tree-sitter-typescript": "^0.23.2",
15+
"web-tree-sitter": "^0.26.3",
916
},
1017
"devDependencies": {
1118
"@biomejs/biome": "^2.3.8",
@@ -241,8 +248,12 @@
241248

242249
"lightningcss-win32-x64-msvc": ["[email protected]", "", { "os": "win32", "cpu": "x64" }, "sha512-5g1yc73p+iAkid5phb4oVFMB45417DkRevRbt/El/gKXJk4jid+vPFF/AXbxn05Aky8PapwzZrdJShv5C0avjw=="],
243250

251+
"node-addon-api": ["[email protected]", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
252+
244253
"node-fetch-native": ["[email protected]", "", {}, "sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q=="],
245254

255+
"node-gyp-build": ["[email protected]", "", { "bin": { "node-gyp-build": "bin.js", "node-gyp-build-optional": "optional.js", "node-gyp-build-test": "build-test.js" } }, "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ=="],
256+
246257
"nypm": ["[email protected]", "", { "dependencies": { "citty": "^0.1.6", "consola": "^3.4.2", "pathe": "^2.0.3", "pkg-types": "^2.3.0", "tinyexec": "^1.0.1" }, "bin": { "nypm": "dist/cli.mjs" } }, "sha512-7eM+hpOtrKrBDCh7Ypu2lJ9Z7PNZBdi/8AT3AX8xoCj43BBVHD0hPSTEvMtkMpfs8FCqBGhxB+uToIQimA111g=="],
247258

248259
"ohash": ["[email protected]", "", {}, "sha512-RdR9FQrFwNBNXAr4GixM8YaRZRJ5PUWbKYbE5eOsrwAjJW0q2REGcf79oYPsLyskQCZG1PLN+S/K1V00joZAoQ=="],
@@ -283,6 +294,18 @@
283294

284295
"tree-kill": ["[email protected]", "", { "bin": { "tree-kill": "cli.js" } }, "sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A=="],
285296

297+
"tree-sitter-go": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.3.1", "node-gyp-build": "^4.8.4" }, "peerDependencies": { "tree-sitter": "^0.25.0" }, "optionalPeers": ["tree-sitter"] }, "sha512-APBc/Dq3xz/e35Xpkhb1blu5UgW+2E3RyGWawZSCNcbGwa7jhSQPS8KsUupuzBla8PCo8+lz9W/JDJjmfRa2tw=="],
298+
299+
"tree-sitter-java": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.2.2", "node-gyp-build": "^4.8.2" }, "peerDependencies": { "tree-sitter": "^0.21.1" }, "optionalPeers": ["tree-sitter"] }, "sha512-Yju7oQ0Xx7GcUT01mUglPP+bYfvqjNCGdxqigTnew9nLGoII42PNVP3bHrYeMxswiCRM0yubWmN5qk+zsg0zMA=="],
300+
301+
"tree-sitter-javascript": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.3.1", "node-gyp-build": "^4.8.4" }, "peerDependencies": { "tree-sitter": "^0.25.0" }, "optionalPeers": ["tree-sitter"] }, "sha512-1fCbmzAskZkxcZzN41sFZ2br2iqTYP3tKls1b/HKGNPQUVOpsUxpmGxdN/wMqAk3jYZnYBR1dd/y/0avMeU7dw=="],
302+
303+
"tree-sitter-python": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.5.0", "node-gyp-build": "^4.8.4" }, "peerDependencies": { "tree-sitter": "^0.25.0" }, "optionalPeers": ["tree-sitter"] }, "sha512-eCmJx6zQa35GxaCtQD+wXHOhYqBxEL+bp71W/s3fcDMu06MrtzkVXR437dRrCrbrDbyLuUDJpAgycs7ncngLXw=="],
304+
305+
"tree-sitter-rust": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.2.2", "node-gyp-build": "^4.8.4" }, "peerDependencies": { "tree-sitter": "^0.22.1" }, "optionalPeers": ["tree-sitter"] }, "sha512-NWemUDf629Tfc90Y0Z55zuwPCAHkLxWnMf2RznYu4iBkkrQl2o/CHGB7Cr52TyN5F1DAx8FmUnDtCy9iUkXZEQ=="],
306+
307+
"tree-sitter-typescript": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.2.2", "node-gyp-build": "^4.8.2", "tree-sitter-javascript": "^0.23.1" }, "peerDependencies": { "tree-sitter": "^0.21.0" }, "optionalPeers": ["tree-sitter"] }, "sha512-e04JUUKxTT53/x3Uq1zIL45DoYKVfHH4CZqwgZhPg5qYROl5nQjV+85ruFzFGZxu+QeFVbRTPDRnqL9UbU4VeA=="],
308+
286309
"ts-import-resolver": ["[email protected]", "", { "peerDependencies": { "typescript": ">=4.5.0" }, "optionalPeers": ["typescript"] }, "sha512-282pgr6j6aOvP3P2I6XugDxdBobkpdMmdbWjRjGl5gjPI1p0+oTNGDh1t924t75kRlyIkF65DiwhSIUysmyHQA=="],
287310

288311
"tslib": ["[email protected]", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="],
@@ -291,8 +314,12 @@
291314

292315
"undici-types": ["[email protected]", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="],
293316

317+
"web-tree-sitter": ["[email protected]", "", {}, "sha512-JIVgIKFS1w6lejxSntCtsS/QsE/ecTS00en809cMxMPxaor6MvUnQ+ovG8uTTTvQCFosSh4MeDdI5bSGw5SoBw=="],
318+
294319
"yaml": ["[email protected]", "", { "bin": { "yaml": "bin.mjs" } }, "sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A=="],
295320

296321
"zlye": ["[email protected]", "", { "dependencies": { "picocolors": "^1.1.1" }, "peerDependencies": { "typescript": ">=4.5.0" }, "optionalPeers": ["typescript"] }, "sha512-fwpeC841X3ElOLYRMKXbwX29pitNrsm6nRNvEhDMrRXDl3BhR2i03Bkr0GNrpyYgZJuEzUsBylXAYzgGPXXOCQ=="],
322+
323+
"tree-sitter-typescript/tree-sitter-javascript": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.2.2", "node-gyp-build": "^4.8.2" }, "peerDependencies": { "tree-sitter": "^0.21.1" }, "optionalPeers": ["tree-sitter"] }, "sha512-/bnhbrTD9frUYHQTiYnPcxyHORIw157ERBa6dqzaKxvR/x3PC4Yzd+D1pZIMS6zNg2v3a8BZ0oK7jHqsQo9fWA=="],
297324
}
298325
}

package.json

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,13 @@
5353
"module": "./dist/index.js",
5454
"types": "./dist/index.d.ts",
5555
"dependencies": {
56-
"effect": "^3.19.12"
56+
"effect": "^3.19.12",
57+
"tree-sitter-go": "^0.25.0",
58+
"tree-sitter-java": "^0.23.5",
59+
"tree-sitter-javascript": "^0.25.0",
60+
"tree-sitter-python": "^0.25.0",
61+
"tree-sitter-rust": "^0.24.0",
62+
"tree-sitter-typescript": "^0.23.2",
63+
"web-tree-sitter": "^0.26.3"
5764
}
5865
}

src/chunk.ts

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import { Effect } from 'effect'
2+
import { chunk as chunkInternal } from './chunking'
3+
import { extractEntities } from './extract'
4+
import { parseCode } from './parser'
5+
import { detectLanguage } from './parser/languages'
6+
import { buildScopeTree } from './scope'
7+
import type { Chunk, ChunkOptions, Language } from './types'
8+
9+
/**
10+
* Error thrown when chunking fails
11+
*/
12+
export class ChunkingError extends Error {
13+
readonly _tag = 'ChunkingError'
14+
override readonly cause?: unknown
15+
16+
constructor(message: string, cause?: unknown) {
17+
super(message)
18+
this.name = 'ChunkingError'
19+
this.cause = cause
20+
}
21+
}
22+
23+
/**
24+
* Error thrown when language detection fails
25+
*/
26+
export class UnsupportedLanguageError extends Error {
27+
readonly _tag = 'UnsupportedLanguageError'
28+
readonly filepath: string
29+
30+
constructor(filepath: string) {
31+
super(`Unsupported file type: ${filepath}`)
32+
this.name = 'UnsupportedLanguageError'
33+
this.filepath = filepath
34+
}
35+
}
36+
37+
/**
38+
* Internal Effect-based implementation of the chunking pipeline
39+
*
40+
* Orchestrates: parse -> extract -> scope -> chunk -> context
41+
*/
42+
const chunkEffect = (
43+
filepath: string,
44+
code: string,
45+
options: ChunkOptions = {},
46+
): Effect.Effect<Chunk[], ChunkingError | UnsupportedLanguageError> => {
47+
return Effect.gen(function* () {
48+
// Step 1: Detect language (or use override)
49+
const language: Language | null =
50+
options.language ?? detectLanguage(filepath)
51+
52+
if (!language) {
53+
return yield* Effect.fail(new UnsupportedLanguageError(filepath))
54+
}
55+
56+
// Step 2: Parse the code
57+
const parseResult = yield* Effect.tryPromise({
58+
try: () => parseCode(code, language),
59+
catch: (error: unknown) =>
60+
new ChunkingError('Failed to parse code', error),
61+
})
62+
63+
// Step 3: Extract entities from AST
64+
const entities = yield* Effect.mapError(
65+
extractEntities(parseResult.tree.rootNode, language, code),
66+
(error: unknown) =>
67+
new ChunkingError('Failed to extract entities', error),
68+
)
69+
70+
// Step 4: Build scope tree
71+
const scopeTree = yield* Effect.mapError(
72+
buildScopeTree(entities),
73+
(error: unknown) =>
74+
new ChunkingError('Failed to build scope tree', error),
75+
)
76+
77+
// Step 5: Chunk the code
78+
const chunks = yield* Effect.mapError(
79+
chunkInternal(
80+
parseResult.tree.rootNode,
81+
code,
82+
scopeTree,
83+
language,
84+
options,
85+
),
86+
(error: unknown) => new ChunkingError('Failed to chunk code', error),
87+
)
88+
89+
// If there was a parse error (but recoverable), attach it to chunk contexts
90+
if (parseResult.error) {
91+
const errorInfo = parseResult.error
92+
return chunks.map((c: Chunk) => ({
93+
...c,
94+
context: {
95+
...c.context,
96+
parseError: errorInfo,
97+
},
98+
}))
99+
}
100+
101+
return chunks
102+
})
103+
}
104+
105+
/**
106+
* Chunk source code into pieces with semantic context
107+
*
108+
* This is the main entry point for the astchunk library. It takes source code
109+
* and returns an array of chunks, each with contextual information about the
110+
* code's structure.
111+
*
112+
* @param filepath - The file path (used for language detection)
113+
* @param code - The source code to chunk
114+
* @param options - Optional chunking configuration
115+
* @returns Array of chunks with context
116+
* @throws ChunkingError if chunking fails
117+
* @throws UnsupportedLanguageError if the file type is not supported
118+
*
119+
* @example
120+
* ```ts
121+
* import { chunk } from 'astchunk'
122+
*
123+
* const chunks = await chunk('src/utils.ts', sourceCode)
124+
* for (const chunk of chunks) {
125+
* console.log(chunk.text, chunk.context)
126+
* }
127+
* ```
128+
*/
129+
export async function chunk(
130+
filepath: string,
131+
code: string,
132+
options?: ChunkOptions,
133+
): Promise<Chunk[]> {
134+
return Effect.runPromise(chunkEffect(filepath, code, options))
135+
}
136+
137+
/**
138+
* Chunk source code synchronously (blocking)
139+
*
140+
* **WARNING: Not yet implemented.** This function will throw an error.
141+
* The chunking pipeline requires async WASM loading which cannot run synchronously.
142+
* Use the async `chunk()` function instead.
143+
*
144+
* @param _filepath - The file path (unused)
145+
* @param _code - The source code (unused)
146+
* @param _options - Optional chunking configuration (unused)
147+
* @throws Error Always throws - sync chunking is not supported
148+
*
149+
* @deprecated Use `chunk()` instead. This will be implemented in a future version
150+
* if there's demand for sync operation with pre-initialized parsers.
151+
*/
152+
export function chunkSync(
153+
_filepath: string,
154+
_code: string,
155+
_options?: ChunkOptions,
156+
): Chunk[] {
157+
throw new Error(
158+
'chunkSync is not supported. The chunking pipeline requires async WASM loading. Use chunk() instead.',
159+
)
160+
}

src/chunker.ts

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import { chunk } from './chunk'
2+
import type { Chunk, Chunker, ChunkOptions } from './types'
3+
4+
/**
5+
* Default options for the chunker
6+
*/
7+
const DEFAULT_OPTIONS: ChunkOptions = {
8+
maxChunkSize: 4096,
9+
contextMode: 'full',
10+
siblingDetail: 'signatures',
11+
filterImports: false,
12+
}
13+
14+
/**
15+
* Implementation of the Chunker interface
16+
*
17+
* Provides a stateful wrapper around the chunk function that:
18+
* - Stores default options
19+
* - Tracks the filepath for language detection
20+
*/
21+
class ChunkerImpl implements Chunker {
22+
private readonly filepath: string
23+
private readonly defaultOptions: ChunkOptions
24+
25+
constructor(filepath: string, options: ChunkOptions = {}) {
26+
this.filepath = filepath
27+
this.defaultOptions = { ...DEFAULT_OPTIONS, ...options }
28+
}
29+
30+
/**
31+
* Chunk source code into pieces with context
32+
*
33+
* @param source - The source code to chunk
34+
* @param options - Optional overrides for chunking options
35+
* @returns Promise resolving to array of chunks
36+
*/
37+
async chunk(source: string, options?: ChunkOptions): Promise<Chunk[]> {
38+
const mergedOptions = { ...this.defaultOptions, ...options }
39+
return chunk(this.filepath, source, mergedOptions)
40+
}
41+
42+
/**
43+
* Stream chunks as they are generated
44+
*
45+
* @param source - The source code to chunk
46+
* @param options - Optional overrides for chunking options
47+
* @returns Async iterable of chunks
48+
*
49+
* TODO: Implement true streaming - for now, this just iterates the array
50+
*/
51+
async *stream(source: string, options?: ChunkOptions): AsyncIterable<Chunk> {
52+
const mergedOptions = { ...this.defaultOptions, ...options }
53+
const chunks = await chunk(this.filepath, source, mergedOptions)
54+
55+
for (const c of chunks) {
56+
yield c
57+
}
58+
}
59+
}
60+
61+
/**
62+
* Create a new Chunker instance for a specific file
63+
*
64+
* The Chunker provides a convenient interface for chunking source code
65+
* with pre-configured options. It's particularly useful when you need to
66+
* chunk multiple versions of the same file or want to stream chunks.
67+
*
68+
* @param filepath - The file path (used for language detection)
69+
* @param options - Default options for all chunking operations
70+
* @returns A Chunker instance
71+
*
72+
* @example
73+
* ```ts
74+
* import { createChunker } from 'astchunk'
75+
*
76+
* const chunker = createChunker('src/utils.ts', { maxChunkSize: 2048 })
77+
*
78+
* // Chunk synchronously
79+
* const chunks = await chunker.chunk(sourceCode)
80+
*
81+
* // Or stream chunks
82+
* for await (const chunk of chunker.stream(sourceCode)) {
83+
* process.stdout.write(chunk.text)
84+
* }
85+
* ```
86+
*/
87+
export function createChunker(
88+
filepath: string,
89+
options?: ChunkOptions,
90+
): Chunker {
91+
return new ChunkerImpl(filepath, options)
92+
}
93+
94+
/**
95+
* Re-export the Chunker type for convenience
96+
*/
97+
export type { Chunker } from './types'

0 commit comments

Comments
 (0)