Skip to content

Commit afc47bb

Browse files
committed
refactor: clean up dead code, unify defaults, fix NWS preprocessing
1 parent e8c126d commit afc47bb

File tree

18 files changed

+863
-1108
lines changed

18 files changed

+863
-1108
lines changed

src/chunk.ts

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import { Effect } from 'effect'
2-
import { chunk as chunkInternal } from './chunking'
2+
import {
3+
chunk as chunkInternal,
4+
streamChunks as streamChunksInternal,
5+
} from './chunking'
36
import { extractEntities } from './extract'
47
import { parseCode } from './parser'
58
import { detectLanguage } from './parser/languages'
@@ -74,14 +77,15 @@ const chunkEffect = (
7477
new ChunkingError('Failed to build scope tree', error),
7578
)
7679

77-
// Step 5: Chunk the code
80+
// Step 5: Chunk the code (passing filepath for context)
7881
const chunks = yield* Effect.mapError(
7982
chunkInternal(
8083
parseResult.tree.rootNode,
8184
code,
8285
scopeTree,
8386
language,
8487
options,
88+
filepath,
8589
),
8690
(error: unknown) => new ChunkingError('Failed to chunk code', error),
8791
)
@@ -133,3 +137,99 @@ export async function chunk(
133137
): Promise<Chunk[]> {
134138
return Effect.runPromise(chunkEffect(filepath, code, options))
135139
}
140+
141+
/**
142+
* Stream source code chunks as they are generated
143+
*
144+
* This function returns an async generator that yields chunks one at a time,
145+
* which is useful for processing large files without waiting for all chunks
146+
* to be generated.
147+
*
148+
* @param filepath - The file path (used for language detection)
149+
* @param code - The source code to chunk
150+
* @param options - Optional chunking configuration
151+
* @returns Async generator of chunks with context
152+
* @throws ChunkingError if chunking fails
153+
* @throws UnsupportedLanguageError if the file type is not supported
154+
*
155+
* @example
156+
* ```ts
157+
* import { stream } from 'astchunk'
158+
*
159+
* for await (const chunk of stream('src/utils.ts', sourceCode)) {
160+
* console.log(chunk.text, chunk.context)
161+
* }
162+
* ```
163+
*/
164+
export async function* chunkStream(
165+
filepath: string,
166+
code: string,
167+
options?: ChunkOptions,
168+
): AsyncGenerator<Chunk> {
169+
// Detect language (or use override)
170+
const language: Language | null =
171+
options?.language ?? detectLanguage(filepath)
172+
173+
if (!language) {
174+
throw new UnsupportedLanguageError(filepath)
175+
}
176+
177+
// Parse the code
178+
let parseResult: Awaited<ReturnType<typeof parseCode>>
179+
try {
180+
parseResult = await parseCode(code, language)
181+
} catch (error) {
182+
throw new ChunkingError('Failed to parse code', error)
183+
}
184+
185+
// Extract entities from AST
186+
let entities: Awaited<
187+
ReturnType<typeof extractEntities> extends Effect.Effect<infer A, unknown>
188+
? A
189+
: never
190+
>
191+
try {
192+
entities = await Effect.runPromise(
193+
extractEntities(parseResult.tree.rootNode, language, code),
194+
)
195+
} catch (error) {
196+
throw new ChunkingError('Failed to extract entities', error)
197+
}
198+
199+
// Build scope tree
200+
let scopeTree: Awaited<
201+
ReturnType<typeof buildScopeTree> extends Effect.Effect<infer A, unknown>
202+
? A
203+
: never
204+
>
205+
try {
206+
scopeTree = await Effect.runPromise(buildScopeTree(entities))
207+
} catch (error) {
208+
throw new ChunkingError('Failed to build scope tree', error)
209+
}
210+
211+
// Stream chunks from the internal generator, passing filepath for context
212+
const chunkGenerator = streamChunksInternal(
213+
parseResult.tree.rootNode,
214+
code,
215+
scopeTree,
216+
language,
217+
options,
218+
filepath,
219+
)
220+
221+
// Yield chunks, optionally attaching parse error if present
222+
for await (const chunk of chunkGenerator) {
223+
if (parseResult.error) {
224+
yield {
225+
...chunk,
226+
context: {
227+
...chunk.context,
228+
parseError: parseResult.error,
229+
},
230+
}
231+
} else {
232+
yield chunk
233+
}
234+
}
235+
}

src/chunker.ts

Lines changed: 29 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,80 @@
1-
import { chunk } from './chunk'
1+
import { chunk as chunkFn, chunkStream as streamFn } from './chunk'
2+
import { DEFAULT_CHUNK_OPTIONS } from './chunking'
23
import type { Chunk, Chunker, ChunkOptions } from './types'
34

4-
/**
5-
* Default options for the chunker
6-
*/
7-
const DEFAULT_OPTIONS: ChunkOptions = {
8-
maxChunkSize: 4096,
9-
contextMode: 'full',
10-
siblingDetail: 'signatures',
11-
filterImports: false,
12-
}
13-
145
/**
156
* Implementation of the Chunker interface
167
*
17-
* Provides a stateful wrapper around the chunk function that:
18-
* - Stores default options
19-
* - Tracks the filepath for language detection
8+
* Provides a stateful wrapper around the chunk and stream functions that
9+
* stores default options and allows per-call overrides.
2010
*/
2111
class ChunkerImpl implements Chunker {
22-
private readonly filepath: string
2312
private readonly defaultOptions: ChunkOptions
2413

25-
constructor(filepath: string, options: ChunkOptions = {}) {
26-
this.filepath = filepath
27-
this.defaultOptions = { ...DEFAULT_OPTIONS, ...options }
14+
constructor(options: ChunkOptions = {}) {
15+
this.defaultOptions = { ...DEFAULT_CHUNK_OPTIONS, ...options }
2816
}
2917

3018
/**
3119
* Chunk source code into pieces with context
3220
*
33-
* @param source - The source code to chunk
21+
* @param filepath - The file path (used for language detection)
22+
* @param code - The source code to chunk
3423
* @param options - Optional overrides for chunking options
3524
* @returns Promise resolving to array of chunks
3625
*/
37-
async chunk(source: string, options?: ChunkOptions): Promise<Chunk[]> {
26+
async chunk(
27+
filepath: string,
28+
code: string,
29+
options?: ChunkOptions,
30+
): Promise<Chunk[]> {
3831
const mergedOptions = { ...this.defaultOptions, ...options }
39-
return chunk(this.filepath, source, mergedOptions)
32+
return chunkFn(filepath, code, mergedOptions)
4033
}
4134

4235
/**
4336
* Stream chunks as they are generated
4437
*
45-
* @param source - The source code to chunk
38+
* @param filepath - The file path (used for language detection)
39+
* @param code - The source code to chunk
4640
* @param options - Optional overrides for chunking options
4741
* @returns Async iterable of chunks
48-
*
49-
* TODO: Implement true streaming - for now, this just iterates the array
5042
*/
51-
async *stream(source: string, options?: ChunkOptions): AsyncIterable<Chunk> {
43+
async *stream(
44+
filepath: string,
45+
code: string,
46+
options?: ChunkOptions,
47+
): AsyncIterable<Chunk> {
5248
const mergedOptions = { ...this.defaultOptions, ...options }
53-
const chunks = await chunk(this.filepath, source, mergedOptions)
54-
55-
for (const c of chunks) {
56-
yield c
57-
}
49+
yield* streamFn(filepath, code, mergedOptions)
5850
}
5951
}
6052

6153
/**
62-
* Create a new Chunker instance for a specific file
54+
* Create a new Chunker instance with default options
6355
*
6456
* The Chunker provides a convenient interface for chunking source code
6557
* with pre-configured options. It's particularly useful when you need to
66-
* chunk multiple versions of the same file or want to stream chunks.
58+
* chunk multiple files with the same configuration.
6759
*
68-
* @param filepath - The file path (used for language detection)
6960
* @param options - Default options for all chunking operations
7061
* @returns A Chunker instance
7162
*
7263
* @example
7364
* ```ts
7465
* import { createChunker } from 'astchunk'
7566
*
76-
* const chunker = createChunker('src/utils.ts', { maxChunkSize: 2048 })
67+
* const chunker = createChunker({ maxChunkSize: 2048 })
7768
*
7869
* // Chunk synchronously
79-
* const chunks = await chunker.chunk(sourceCode)
70+
* const chunks = await chunker.chunk('src/utils.ts', sourceCode)
8071
*
8172
* // Or stream chunks
82-
* for await (const chunk of chunker.stream(sourceCode)) {
73+
* for await (const chunk of chunker.stream('src/utils.ts', sourceCode)) {
8374
* process.stdout.write(chunk.text)
8475
* }
8576
* ```
8677
*/
87-
export function createChunker(
88-
filepath: string,
89-
options?: ChunkOptions,
90-
): Chunker {
91-
return new ChunkerImpl(filepath, options)
78+
export function createChunker(options?: ChunkOptions): Chunker {
79+
return new ChunkerImpl(options)
9280
}
93-
94-
/**
95-
* Re-export the Chunker type for convenience
96-
*/
97-
export type { Chunker } from './types'

0 commit comments

Comments
 (0)