Skip to content

Commit a0ff523

Browse files
authored
Merge pull request #2 from supermemoryai/12-16-setup_project_structure_with_tree-sitter_and_effect
setup project structure with tree-sitter and effect
2 parents 75ef0ad + c06cf13 commit a0ff523

File tree

24 files changed

+2126
-4
lines changed

24 files changed

+2126
-4
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,5 @@ yarn-error.log*
1818
.env.test.local
1919
.env.production.local
2020
.turbo
21+
todo.md
22+
plan.md

bun.lock

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,16 @@
44
"workspaces": {
55
"": {
66
"name": "astchunk",
7+
"dependencies": {
8+
"effect": "^3.19.12",
9+
"tree-sitter-go": "^0.25.0",
10+
"tree-sitter-java": "^0.23.5",
11+
"tree-sitter-javascript": "^0.25.0",
12+
"tree-sitter-python": "^0.25.0",
13+
"tree-sitter-rust": "^0.24.0",
14+
"tree-sitter-typescript": "^0.23.2",
15+
"web-tree-sitter": "^0.26.3",
16+
},
717
"devDependencies": {
818
"@biomejs/biome": "^2.3.8",
919
"@types/bun": "^1.3.4",
@@ -158,6 +168,8 @@
158168

159169
"@oxc-transform/binding-win32-x64-msvc": ["@oxc-transform/[email protected]", "", { "os": "win32", "cpu": "x64" }, "sha512-6QN3DEaEw3eWioWEFRgNsTvYq8czYSnpkjB2za+/WdLN0g5FzOl2ZEfNiPrBWIPnSmjUmDWtWVWcSjwY7fX5/Q=="],
160170

171+
"@standard-schema/spec": ["@standard-schema/[email protected]", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="],
172+
161173
"@tybys/wasm-util": ["@tybys/[email protected]", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg=="],
162174

163175
"@types/bun": ["@types/[email protected]", "", { "dependencies": { "bun-types": "1.3.4" } }, "sha512-EEPTKXHP+zKGPkhRLv+HI0UEX8/o+65hqARxLy8Ov5rIxMBPNTjeZww00CIihrIQGEQBYg+0roO5qOnS/7boGA=="],
@@ -196,10 +208,14 @@
196208

197209
"dotenv": ["[email protected]", "", {}, "sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w=="],
198210

211+
"effect": ["[email protected]", "", { "dependencies": { "@standard-schema/spec": "^1.0.0", "fast-check": "^3.23.1" } }, "sha512-7F9RGTrCTC3D7nh9Zw+3VlJWwZgo5k33KA+476BAaD0rKIXKZsY/jQ+ipyhR/Avo239Fi6GqAVFs1mqM1IJ7yg=="],
212+
199213
"escalade": ["[email protected]", "", {}, "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA=="],
200214

201215
"exsolve": ["[email protected]", "", {}, "sha512-LmDxfWXwcTArk8fUEnOfSZpHOJ6zOMUJKOtFLFqJLoKJetuQG874Uc7/Kki7zFLzYybmZhp1M7+98pfMqeX8yA=="],
202216

217+
"fast-check": ["[email protected]", "", { "dependencies": { "pure-rand": "^6.1.0" } }, "sha512-h5+1OzzfCC3Ef7VbtKdcv7zsstUQwUDlYpUTvjeUsJAssPgLn7QzbboPtL5ro04Mq0rPOsMzl7q5hIbRs2wD1A=="],
218+
203219
"fdir": ["[email protected]", "", { "peerDependencies": { "picomatch": "^3 || ^4" }, "optionalPeers": ["picomatch"] }, "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg=="],
204220

205221
"giget": ["[email protected]", "", { "dependencies": { "citty": "^0.1.6", "consola": "^3.4.0", "defu": "^6.1.4", "node-fetch-native": "^1.6.6", "nypm": "^0.6.0", "pathe": "^2.0.3" }, "bin": { "giget": "dist/cli.mjs" } }, "sha512-L5bGsVkxJbJgdnwyuheIunkGatUF/zssUoxxjACCseZYAVbaqdh9Tsmmlkl8vYan09H7sbvKt4pS8GqKLBrEzA=="],
@@ -232,8 +248,12 @@
232248

233249
"lightningcss-win32-x64-msvc": ["[email protected]", "", { "os": "win32", "cpu": "x64" }, "sha512-5g1yc73p+iAkid5phb4oVFMB45417DkRevRbt/El/gKXJk4jid+vPFF/AXbxn05Aky8PapwzZrdJShv5C0avjw=="],
234250

251+
"node-addon-api": ["[email protected]", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
252+
235253
"node-fetch-native": ["[email protected]", "", {}, "sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q=="],
236254

255+
"node-gyp-build": ["[email protected]", "", { "bin": { "node-gyp-build": "bin.js", "node-gyp-build-optional": "optional.js", "node-gyp-build-test": "build-test.js" } }, "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ=="],
256+
237257
"nypm": ["[email protected]", "", { "dependencies": { "citty": "^0.1.6", "consola": "^3.4.2", "pathe": "^2.0.3", "pkg-types": "^2.3.0", "tinyexec": "^1.0.1" }, "bin": { "nypm": "dist/cli.mjs" } }, "sha512-7eM+hpOtrKrBDCh7Ypu2lJ9Z7PNZBdi/8AT3AX8xoCj43BBVHD0hPSTEvMtkMpfs8FCqBGhxB+uToIQimA111g=="],
238258

239259
"ohash": ["[email protected]", "", {}, "sha512-RdR9FQrFwNBNXAr4GixM8YaRZRJ5PUWbKYbE5eOsrwAjJW0q2REGcf79oYPsLyskQCZG1PLN+S/K1V00joZAoQ=="],
@@ -256,6 +276,8 @@
256276

257277
"pkg-types": ["[email protected]", "", { "dependencies": { "confbox": "^0.2.2", "exsolve": "^1.0.7", "pathe": "^2.0.3" } }, "sha512-SIqCzDRg0s9npO5XQ3tNZioRY1uK06lA41ynBC1YmFTmnY6FjUjVt6s4LoADmwoig1qqD0oK8h1p/8mlMx8Oig=="],
258278

279+
"pure-rand": ["[email protected]", "", {}, "sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA=="],
280+
259281
"rc9": ["[email protected]", "", { "dependencies": { "defu": "^6.1.4", "destr": "^2.0.3" } }, "sha512-btXCnMmRIBINM2LDZoEmOogIZU7Qe7zn4BpomSKZ/ykbLObuBdvG+mFq11DL6fjH1DRwHhrlgtYWG96bJiC7Cg=="],
260282

261283
"readdirp": ["[email protected]", "", {}, "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg=="],
@@ -272,6 +294,18 @@
272294

273295
"tree-kill": ["[email protected]", "", { "bin": { "tree-kill": "cli.js" } }, "sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A=="],
274296

297+
"tree-sitter-go": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.3.1", "node-gyp-build": "^4.8.4" }, "peerDependencies": { "tree-sitter": "^0.25.0" }, "optionalPeers": ["tree-sitter"] }, "sha512-APBc/Dq3xz/e35Xpkhb1blu5UgW+2E3RyGWawZSCNcbGwa7jhSQPS8KsUupuzBla8PCo8+lz9W/JDJjmfRa2tw=="],
298+
299+
"tree-sitter-java": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.2.2", "node-gyp-build": "^4.8.2" }, "peerDependencies": { "tree-sitter": "^0.21.1" }, "optionalPeers": ["tree-sitter"] }, "sha512-Yju7oQ0Xx7GcUT01mUglPP+bYfvqjNCGdxqigTnew9nLGoII42PNVP3bHrYeMxswiCRM0yubWmN5qk+zsg0zMA=="],
300+
301+
"tree-sitter-javascript": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.3.1", "node-gyp-build": "^4.8.4" }, "peerDependencies": { "tree-sitter": "^0.25.0" }, "optionalPeers": ["tree-sitter"] }, "sha512-1fCbmzAskZkxcZzN41sFZ2br2iqTYP3tKls1b/HKGNPQUVOpsUxpmGxdN/wMqAk3jYZnYBR1dd/y/0avMeU7dw=="],
302+
303+
"tree-sitter-python": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.5.0", "node-gyp-build": "^4.8.4" }, "peerDependencies": { "tree-sitter": "^0.25.0" }, "optionalPeers": ["tree-sitter"] }, "sha512-eCmJx6zQa35GxaCtQD+wXHOhYqBxEL+bp71W/s3fcDMu06MrtzkVXR437dRrCrbrDbyLuUDJpAgycs7ncngLXw=="],
304+
305+
"tree-sitter-rust": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.2.2", "node-gyp-build": "^4.8.4" }, "peerDependencies": { "tree-sitter": "^0.22.1" }, "optionalPeers": ["tree-sitter"] }, "sha512-NWemUDf629Tfc90Y0Z55zuwPCAHkLxWnMf2RznYu4iBkkrQl2o/CHGB7Cr52TyN5F1DAx8FmUnDtCy9iUkXZEQ=="],
306+
307+
"tree-sitter-typescript": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.2.2", "node-gyp-build": "^4.8.2", "tree-sitter-javascript": "^0.23.1" }, "peerDependencies": { "tree-sitter": "^0.21.0" }, "optionalPeers": ["tree-sitter"] }, "sha512-e04JUUKxTT53/x3Uq1zIL45DoYKVfHH4CZqwgZhPg5qYROl5nQjV+85ruFzFGZxu+QeFVbRTPDRnqL9UbU4VeA=="],
308+
275309
"ts-import-resolver": ["[email protected]", "", { "peerDependencies": { "typescript": ">=4.5.0" }, "optionalPeers": ["typescript"] }, "sha512-282pgr6j6aOvP3P2I6XugDxdBobkpdMmdbWjRjGl5gjPI1p0+oTNGDh1t924t75kRlyIkF65DiwhSIUysmyHQA=="],
276310

277311
"tslib": ["[email protected]", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="],
@@ -280,8 +314,12 @@
280314

281315
"undici-types": ["[email protected]", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="],
282316

317+
"web-tree-sitter": ["[email protected]", "", {}, "sha512-JIVgIKFS1w6lejxSntCtsS/QsE/ecTS00en809cMxMPxaor6MvUnQ+ovG8uTTTvQCFosSh4MeDdI5bSGw5SoBw=="],
318+
283319
"yaml": ["[email protected]", "", { "bin": { "yaml": "bin.mjs" } }, "sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A=="],
284320

285321
"zlye": ["[email protected]", "", { "dependencies": { "picocolors": "^1.1.1" }, "peerDependencies": { "typescript": ">=4.5.0" }, "optionalPeers": ["typescript"] }, "sha512-fwpeC841X3ElOLYRMKXbwX29pitNrsm6nRNvEhDMrRXDl3BhR2i03Bkr0GNrpyYgZJuEzUsBylXAYzgGPXXOCQ=="],
322+
323+
"tree-sitter-typescript/tree-sitter-javascript": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.2.2", "node-gyp-build": "^4.8.2" }, "peerDependencies": { "tree-sitter": "^0.21.1" }, "optionalPeers": ["tree-sitter"] }, "sha512-/bnhbrTD9frUYHQTiYnPcxyHORIw157ERBa6dqzaKxvR/x3PC4Yzd+D1pZIMS6zNg2v3a8BZ0oK7jHqsQo9fWA=="],
286324
}
287325
}

package.json

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,15 @@
5151
"./package.json": "./package.json"
5252
},
5353
"module": "./dist/index.js",
54-
"types": "./dist/index.d.ts"
54+
"types": "./dist/index.d.ts",
55+
"dependencies": {
56+
"effect": "^3.19.12",
57+
"tree-sitter-go": "^0.25.0",
58+
"tree-sitter-java": "^0.23.5",
59+
"tree-sitter-javascript": "^0.25.0",
60+
"tree-sitter-python": "^0.25.0",
61+
"tree-sitter-rust": "^0.24.0",
62+
"tree-sitter-typescript": "^0.23.2",
63+
"web-tree-sitter": "^0.26.3"
64+
}
5565
}

src/chunk.ts

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import { Effect } from 'effect'
2+
import { chunk as chunkInternal } from './chunking'
3+
import { extractEntities } from './extract'
4+
import { parseCode } from './parser'
5+
import { detectLanguage } from './parser/languages'
6+
import { buildScopeTree } from './scope'
7+
import type { Chunk, ChunkOptions, Language } from './types'
8+
9+
/**
10+
* Error thrown when chunking fails
11+
*/
12+
export class ChunkingError extends Error {
13+
readonly _tag = 'ChunkingError'
14+
override readonly cause?: unknown
15+
16+
constructor(message: string, cause?: unknown) {
17+
super(message)
18+
this.name = 'ChunkingError'
19+
this.cause = cause
20+
}
21+
}
22+
23+
/**
24+
* Error thrown when language detection fails
25+
*/
26+
export class UnsupportedLanguageError extends Error {
27+
readonly _tag = 'UnsupportedLanguageError'
28+
readonly filepath: string
29+
30+
constructor(filepath: string) {
31+
super(`Unsupported file type: ${filepath}`)
32+
this.name = 'UnsupportedLanguageError'
33+
this.filepath = filepath
34+
}
35+
}
36+
37+
/**
38+
* Internal Effect-based implementation of the chunking pipeline
39+
*
40+
* Orchestrates: parse -> extract -> scope -> chunk -> context
41+
*/
42+
const chunkEffect = (
43+
filepath: string,
44+
code: string,
45+
options: ChunkOptions = {},
46+
): Effect.Effect<Chunk[], ChunkingError | UnsupportedLanguageError> => {
47+
return Effect.gen(function* () {
48+
// Step 1: Detect language (or use override)
49+
const language: Language | null =
50+
options.language ?? detectLanguage(filepath)
51+
52+
if (!language) {
53+
return yield* Effect.fail(new UnsupportedLanguageError(filepath))
54+
}
55+
56+
// Step 2: Parse the code
57+
const parseResult = yield* Effect.tryPromise({
58+
try: () => parseCode(code, language),
59+
catch: (error: unknown) =>
60+
new ChunkingError('Failed to parse code', error),
61+
})
62+
63+
// Step 3: Extract entities from AST
64+
const entities = yield* Effect.mapError(
65+
extractEntities(parseResult.tree.rootNode, language, code),
66+
(error: unknown) =>
67+
new ChunkingError('Failed to extract entities', error),
68+
)
69+
70+
// Step 4: Build scope tree
71+
const scopeTree = yield* Effect.mapError(
72+
buildScopeTree(entities),
73+
(error: unknown) =>
74+
new ChunkingError('Failed to build scope tree', error),
75+
)
76+
77+
// Step 5: Chunk the code
78+
const chunks = yield* Effect.mapError(
79+
chunkInternal(
80+
parseResult.tree.rootNode,
81+
code,
82+
scopeTree,
83+
language,
84+
options,
85+
),
86+
(error: unknown) => new ChunkingError('Failed to chunk code', error),
87+
)
88+
89+
// If there was a parse error (but recoverable), attach it to chunk contexts
90+
if (parseResult.error) {
91+
const errorInfo = parseResult.error
92+
return chunks.map((c: Chunk) => ({
93+
...c,
94+
context: {
95+
...c.context,
96+
parseError: errorInfo,
97+
},
98+
}))
99+
}
100+
101+
return chunks
102+
})
103+
}
104+
105+
/**
106+
* Chunk source code into pieces with semantic context
107+
*
108+
* This is the main entry point for the astchunk library. It takes source code
109+
* and returns an array of chunks, each with contextual information about the
110+
* code's structure.
111+
*
112+
* @param filepath - The file path (used for language detection)
113+
* @param code - The source code to chunk
114+
* @param options - Optional chunking configuration
115+
* @returns Array of chunks with context
116+
* @throws ChunkingError if chunking fails
117+
* @throws UnsupportedLanguageError if the file type is not supported
118+
*
119+
* @example
120+
* ```ts
121+
* import { chunk } from 'astchunk'
122+
*
123+
* const chunks = await chunk('src/utils.ts', sourceCode)
124+
* for (const chunk of chunks) {
125+
* console.log(chunk.text, chunk.context)
126+
* }
127+
* ```
128+
*/
129+
export async function chunk(
130+
filepath: string,
131+
code: string,
132+
options?: ChunkOptions,
133+
): Promise<Chunk[]> {
134+
return Effect.runPromise(chunkEffect(filepath, code, options))
135+
}
136+
137+
/**
138+
* Chunk source code synchronously (blocking)
139+
*
140+
* **WARNING: Not yet implemented.** This function will throw an error.
141+
* The chunking pipeline requires async WASM loading which cannot run synchronously.
142+
* Use the async `chunk()` function instead.
143+
*
144+
* @param _filepath - The file path (unused)
145+
* @param _code - The source code (unused)
146+
* @param _options - Optional chunking configuration (unused)
147+
* @throws Error Always throws - sync chunking is not supported
148+
*
149+
* @deprecated Use `chunk()` instead. This will be implemented in a future version
150+
* if there's demand for sync operation with pre-initialized parsers.
151+
*/
152+
export function chunkSync(
153+
_filepath: string,
154+
_code: string,
155+
_options?: ChunkOptions,
156+
): Chunk[] {
157+
throw new Error(
158+
'chunkSync is not supported. The chunking pipeline requires async WASM loading. Use chunk() instead.',
159+
)
160+
}

src/chunker.ts

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import { chunk } from './chunk'
2+
import type { Chunk, Chunker, ChunkOptions } from './types'
3+
4+
/**
5+
* Default options for the chunker
6+
*/
7+
const DEFAULT_OPTIONS: ChunkOptions = {
8+
maxChunkSize: 4096,
9+
contextMode: 'full',
10+
siblingDetail: 'signatures',
11+
filterImports: false,
12+
}
13+
14+
/**
15+
* Implementation of the Chunker interface
16+
*
17+
* Provides a stateful wrapper around the chunk function that:
18+
* - Stores default options
19+
* - Tracks the filepath for language detection
20+
*/
21+
class ChunkerImpl implements Chunker {
22+
private readonly filepath: string
23+
private readonly defaultOptions: ChunkOptions
24+
25+
constructor(filepath: string, options: ChunkOptions = {}) {
26+
this.filepath = filepath
27+
this.defaultOptions = { ...DEFAULT_OPTIONS, ...options }
28+
}
29+
30+
/**
31+
* Chunk source code into pieces with context
32+
*
33+
* @param source - The source code to chunk
34+
* @param options - Optional overrides for chunking options
35+
* @returns Promise resolving to array of chunks
36+
*/
37+
async chunk(source: string, options?: ChunkOptions): Promise<Chunk[]> {
38+
const mergedOptions = { ...this.defaultOptions, ...options }
39+
return chunk(this.filepath, source, mergedOptions)
40+
}
41+
42+
/**
43+
* Stream chunks as they are generated
44+
*
45+
* @param source - The source code to chunk
46+
* @param options - Optional overrides for chunking options
47+
* @returns Async iterable of chunks
48+
*
49+
* TODO: Implement true streaming - for now, this just iterates the array
50+
*/
51+
async *stream(source: string, options?: ChunkOptions): AsyncIterable<Chunk> {
52+
const mergedOptions = { ...this.defaultOptions, ...options }
53+
const chunks = await chunk(this.filepath, source, mergedOptions)
54+
55+
for (const c of chunks) {
56+
yield c
57+
}
58+
}
59+
}
60+
61+
/**
62+
* Create a new Chunker instance for a specific file
63+
*
64+
* The Chunker provides a convenient interface for chunking source code
65+
* with pre-configured options. It's particularly useful when you need to
66+
* chunk multiple versions of the same file or want to stream chunks.
67+
*
68+
* @param filepath - The file path (used for language detection)
69+
* @param options - Default options for all chunking operations
70+
* @returns A Chunker instance
71+
*
72+
* @example
73+
* ```ts
74+
* import { createChunker } from 'astchunk'
75+
*
76+
* const chunker = createChunker('src/utils.ts', { maxChunkSize: 2048 })
77+
*
78+
* // Chunk synchronously
79+
* const chunks = await chunker.chunk(sourceCode)
80+
*
81+
* // Or stream chunks
82+
* for await (const chunk of chunker.stream(sourceCode)) {
83+
* process.stdout.write(chunk.text)
84+
* }
85+
* ```
86+
*/
87+
export function createChunker(
88+
filepath: string,
89+
options?: ChunkOptions,
90+
): Chunker {
91+
return new ChunkerImpl(filepath, options)
92+
}
93+
94+
/**
95+
* Re-export the Chunker type for convenience
96+
*/
97+
export type { Chunker } from './types'

0 commit comments

Comments
 (0)