Skip to content

Commit 8f8c616

Browse files
authored
Merge pull request #10 from supermemoryai/12-17-refactor_clean_up_dead_code_unify_defaults_fix_nws_preprocessing
refactor: clean up dead code, unify defaults, fix NWS preprocessing
2 parents e8c126d + d362568 commit 8f8c616

23 files changed

+5568
-1515
lines changed

biome.json

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,23 @@
2828
"enabled": true,
2929
"useIgnoreFile": true,
3030
"clientKind": "git"
31-
}
31+
},
32+
"overrides": [
33+
{
34+
"includes": ["test/**/*.ts"],
35+
"linter": {
36+
"rules": {
37+
"style": {
38+
"noNonNullAssertion": "off"
39+
},
40+
"suspicious": {
41+
"noNonNullAssertedOptionalChain": "off"
42+
},
43+
"correctness": {
44+
"noUnsafeOptionalChaining": "off"
45+
}
46+
}
47+
}
48+
}
49+
]
3250
}

src/chunk.ts

Lines changed: 183 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,19 @@
1-
import { Effect } from 'effect'
2-
import { chunk as chunkInternal } from './chunking'
1+
import { Effect, Stream } from 'effect'
2+
import {
3+
chunk as chunkInternal,
4+
streamChunks as streamChunksInternal,
5+
} from './chunking'
36
import { extractEntities } from './extract'
47
import { parseCode } from './parser'
58
import { detectLanguage } from './parser/languages'
69
import { buildScopeTree } from './scope'
7-
import type { Chunk, ChunkOptions, Language } from './types'
10+
import type {
11+
Chunk,
12+
ChunkOptions,
13+
Language,
14+
ParseResult,
15+
ScopeTree,
16+
} from './types'
817

918
/**
1019
* Error thrown when chunking fails
@@ -74,14 +83,15 @@ const chunkEffect = (
7483
new ChunkingError('Failed to build scope tree', error),
7584
)
7685

77-
// Step 5: Chunk the code
86+
// Step 5: Chunk the code (passing filepath for context)
7887
const chunks = yield* Effect.mapError(
7988
chunkInternal(
8089
parseResult.tree.rootNode,
8190
code,
8291
scopeTree,
8392
language,
8493
options,
94+
filepath,
8595
),
8696
(error: unknown) => new ChunkingError('Failed to chunk code', error),
8797
)
@@ -133,3 +143,172 @@ export async function chunk(
133143
): Promise<Chunk[]> {
134144
return Effect.runPromise(chunkEffect(filepath, code, options))
135145
}
146+
147+
/**
148+
* Prepare the chunking pipeline (parse, extract, build scope tree)
149+
* Returns the parsed result and scope tree needed for chunking
150+
*/
151+
const prepareChunking = (
152+
filepath: string,
153+
code: string,
154+
options?: ChunkOptions,
155+
): Effect.Effect<
156+
{ parseResult: ParseResult; scopeTree: ScopeTree; language: Language },
157+
ChunkingError | UnsupportedLanguageError
158+
> => {
159+
return Effect.gen(function* () {
160+
// Step 1: Detect language (or use override)
161+
const language: Language | null =
162+
options?.language ?? detectLanguage(filepath)
163+
164+
if (!language) {
165+
return yield* Effect.fail(new UnsupportedLanguageError(filepath))
166+
}
167+
168+
// Step 2: Parse the code
169+
const parseResult = yield* Effect.tryPromise({
170+
try: () => parseCode(code, language),
171+
catch: (error: unknown) =>
172+
new ChunkingError('Failed to parse code', error),
173+
})
174+
175+
// Step 3: Extract entities from AST
176+
const entities = yield* Effect.mapError(
177+
extractEntities(parseResult.tree.rootNode, language, code),
178+
(error: unknown) =>
179+
new ChunkingError('Failed to extract entities', error),
180+
)
181+
182+
// Step 4: Build scope tree
183+
const scopeTree = yield* Effect.mapError(
184+
buildScopeTree(entities),
185+
(error: unknown) =>
186+
new ChunkingError('Failed to build scope tree', error),
187+
)
188+
189+
return { parseResult, scopeTree, language }
190+
})
191+
}
192+
193+
/**
194+
* Create an Effect Stream that yields chunks
195+
*
196+
* This is the Effect-native streaming API. Use this if you're working
197+
* within the Effect ecosystem and want full composability.
198+
*
199+
* @param filepath - The file path (used for language detection)
200+
* @param code - The source code to chunk
201+
* @param options - Optional chunking configuration
202+
* @returns Effect Stream of chunks with context
203+
*
204+
* @example
205+
* ```ts
206+
* import { chunkStreamEffect } from 'astchunk'
207+
* import { Effect, Stream } from 'effect'
208+
*
209+
* const program = Stream.runForEach(
210+
* chunkStreamEffect('src/utils.ts', sourceCode),
211+
* (chunk) => Effect.log(chunk.text)
212+
* )
213+
*
214+
* Effect.runPromise(program)
215+
* ```
216+
*/
217+
export const chunkStreamEffect = (
218+
filepath: string,
219+
code: string,
220+
options?: ChunkOptions,
221+
): Stream.Stream<Chunk, ChunkingError | UnsupportedLanguageError> => {
222+
return Stream.unwrap(
223+
Effect.map(prepareChunking(filepath, code, options), (prepared) => {
224+
const { parseResult, scopeTree, language } = prepared
225+
226+
// Create stream from the internal generator
227+
return Stream.fromAsyncIterable(
228+
streamChunksInternal(
229+
parseResult.tree.rootNode,
230+
code,
231+
scopeTree,
232+
language,
233+
options,
234+
filepath,
235+
),
236+
(error) => new ChunkingError('Stream iteration failed', error),
237+
).pipe(
238+
// Attach parse error to chunks if present
239+
Stream.map((chunk) =>
240+
parseResult.error
241+
? {
242+
...chunk,
243+
context: {
244+
...chunk.context,
245+
parseError: parseResult.error,
246+
},
247+
}
248+
: chunk,
249+
),
250+
)
251+
}),
252+
)
253+
}
254+
255+
/**
256+
* Stream source code chunks as they are generated
257+
*
258+
* This function returns an async generator that yields chunks one at a time,
259+
* which is useful for processing large files without waiting for all chunks
260+
* to be generated.
261+
*
262+
* @param filepath - The file path (used for language detection)
263+
* @param code - The source code to chunk
264+
* @param options - Optional chunking configuration
265+
* @returns Async generator of chunks with context
266+
* @throws ChunkingError if chunking fails
267+
* @throws UnsupportedLanguageError if the file type is not supported
268+
*
269+
* @example
270+
* ```ts
271+
* import { chunkStream } from 'astchunk'
272+
*
273+
* for await (const chunk of chunkStream('src/utils.ts', sourceCode)) {
274+
* console.log(chunk.text, chunk.context)
275+
* }
276+
* ```
277+
*/
278+
export async function* chunkStream(
279+
filepath: string,
280+
code: string,
281+
options?: ChunkOptions,
282+
): AsyncGenerator<Chunk> {
283+
// Prepare the chunking pipeline
284+
const prepared = await Effect.runPromise(
285+
prepareChunking(filepath, code, options),
286+
)
287+
288+
const { parseResult, scopeTree, language } = prepared
289+
290+
// Stream chunks from the internal generator
291+
const chunkGenerator = streamChunksInternal(
292+
parseResult.tree.rootNode,
293+
code,
294+
scopeTree,
295+
language,
296+
options,
297+
filepath,
298+
)
299+
300+
// Yield chunks, optionally attaching parse error if present
301+
for await (const chunk of chunkGenerator) {
302+
if (parseResult.error) {
303+
yield {
304+
...chunk,
305+
context: {
306+
...chunk.context,
307+
parseError: parseResult.error,
308+
},
309+
}
310+
} else {
311+
yield chunk
312+
}
313+
}
314+
}

src/chunker.ts

Lines changed: 29 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,80 @@
1-
import { chunk } from './chunk'
1+
import { chunk as chunkFn, chunkStream as streamFn } from './chunk'
2+
import { DEFAULT_CHUNK_OPTIONS } from './chunking'
23
import type { Chunk, Chunker, ChunkOptions } from './types'
34

4-
/**
5-
* Default options for the chunker
6-
*/
7-
const DEFAULT_OPTIONS: ChunkOptions = {
8-
maxChunkSize: 4096,
9-
contextMode: 'full',
10-
siblingDetail: 'signatures',
11-
filterImports: false,
12-
}
13-
145
/**
156
* Implementation of the Chunker interface
167
*
17-
* Provides a stateful wrapper around the chunk function that:
18-
* - Stores default options
19-
* - Tracks the filepath for language detection
8+
* Provides a stateful wrapper around the chunk and stream functions that
9+
* stores default options and allows per-call overrides.
2010
*/
2111
class ChunkerImpl implements Chunker {
22-
private readonly filepath: string
2312
private readonly defaultOptions: ChunkOptions
2413

25-
constructor(filepath: string, options: ChunkOptions = {}) {
26-
this.filepath = filepath
27-
this.defaultOptions = { ...DEFAULT_OPTIONS, ...options }
14+
constructor(options: ChunkOptions = {}) {
15+
this.defaultOptions = { ...DEFAULT_CHUNK_OPTIONS, ...options }
2816
}
2917

3018
/**
3119
* Chunk source code into pieces with context
3220
*
33-
* @param source - The source code to chunk
21+
* @param filepath - The file path (used for language detection)
22+
* @param code - The source code to chunk
3423
* @param options - Optional overrides for chunking options
3524
* @returns Promise resolving to array of chunks
3625
*/
37-
async chunk(source: string, options?: ChunkOptions): Promise<Chunk[]> {
26+
async chunk(
27+
filepath: string,
28+
code: string,
29+
options?: ChunkOptions,
30+
): Promise<Chunk[]> {
3831
const mergedOptions = { ...this.defaultOptions, ...options }
39-
return chunk(this.filepath, source, mergedOptions)
32+
return chunkFn(filepath, code, mergedOptions)
4033
}
4134

4235
/**
4336
* Stream chunks as they are generated
4437
*
45-
* @param source - The source code to chunk
38+
* @param filepath - The file path (used for language detection)
39+
* @param code - The source code to chunk
4640
* @param options - Optional overrides for chunking options
4741
* @returns Async iterable of chunks
48-
*
49-
* TODO: Implement true streaming - for now, this just iterates the array
5042
*/
51-
async *stream(source: string, options?: ChunkOptions): AsyncIterable<Chunk> {
43+
async *stream(
44+
filepath: string,
45+
code: string,
46+
options?: ChunkOptions,
47+
): AsyncIterable<Chunk> {
5248
const mergedOptions = { ...this.defaultOptions, ...options }
53-
const chunks = await chunk(this.filepath, source, mergedOptions)
54-
55-
for (const c of chunks) {
56-
yield c
57-
}
49+
yield* streamFn(filepath, code, mergedOptions)
5850
}
5951
}
6052

6153
/**
62-
* Create a new Chunker instance for a specific file
54+
* Create a new Chunker instance with default options
6355
*
6456
* The Chunker provides a convenient interface for chunking source code
6557
* with pre-configured options. It's particularly useful when you need to
66-
* chunk multiple versions of the same file or want to stream chunks.
58+
* chunk multiple files with the same configuration.
6759
*
68-
* @param filepath - The file path (used for language detection)
6960
* @param options - Default options for all chunking operations
7061
* @returns A Chunker instance
7162
*
7263
* @example
7364
* ```ts
7465
* import { createChunker } from 'astchunk'
7566
*
76-
* const chunker = createChunker('src/utils.ts', { maxChunkSize: 2048 })
67+
* const chunker = createChunker({ maxChunkSize: 2048 })
7768
*
7869
* // Chunk synchronously
79-
* const chunks = await chunker.chunk(sourceCode)
70+
* const chunks = await chunker.chunk('src/utils.ts', sourceCode)
8071
*
8172
* // Or stream chunks
82-
* for await (const chunk of chunker.stream(sourceCode)) {
73+
* for await (const chunk of chunker.stream('src/utils.ts', sourceCode)) {
8374
* process.stdout.write(chunk.text)
8475
* }
8576
* ```
8677
*/
87-
export function createChunker(
88-
filepath: string,
89-
options?: ChunkOptions,
90-
): Chunker {
91-
return new ChunkerImpl(filepath, options)
78+
export function createChunker(options?: ChunkOptions): Chunker {
79+
return new ChunkerImpl(options)
9280
}
93-
94-
/**
95-
* Re-export the Chunker type for convenience
96-
*/
97-
export type { Chunker } from './types'

0 commit comments

Comments
 (0)