|
| 1 | +import { Effect } from 'effect' |
| 2 | +import { chunk as chunkInternal } from './chunking' |
| 3 | +import { extractEntities } from './extract' |
| 4 | +import { parseCode } from './parser' |
| 5 | +import { detectLanguage } from './parser/languages' |
| 6 | +import { buildScopeTree } from './scope' |
| 7 | +import type { Chunk, ChunkOptions, Language } from './types' |
| 8 | + |
| 9 | +/** |
| 10 | + * Error thrown when chunking fails |
| 11 | + */ |
| 12 | +export class ChunkingError extends Error { |
| 13 | + readonly _tag = 'ChunkingError' |
| 14 | + override readonly cause?: unknown |
| 15 | + |
| 16 | + constructor(message: string, cause?: unknown) { |
| 17 | + super(message) |
| 18 | + this.name = 'ChunkingError' |
| 19 | + this.cause = cause |
| 20 | + } |
| 21 | +} |
| 22 | + |
| 23 | +/** |
| 24 | + * Error thrown when language detection fails |
| 25 | + */ |
| 26 | +export class UnsupportedLanguageError extends Error { |
| 27 | + readonly _tag = 'UnsupportedLanguageError' |
| 28 | + readonly filepath: string |
| 29 | + |
| 30 | + constructor(filepath: string) { |
| 31 | + super(`Unsupported file type: ${filepath}`) |
| 32 | + this.name = 'UnsupportedLanguageError' |
| 33 | + this.filepath = filepath |
| 34 | + } |
| 35 | +} |
| 36 | + |
| 37 | +/** |
| 38 | + * Internal Effect-based implementation of the chunking pipeline |
| 39 | + * |
| 40 | + * Orchestrates: parse -> extract -> scope -> chunk -> context |
| 41 | + */ |
| 42 | +const chunkEffect = ( |
| 43 | + filepath: string, |
| 44 | + code: string, |
| 45 | + options: ChunkOptions = {}, |
| 46 | +): Effect.Effect<Chunk[], ChunkingError | UnsupportedLanguageError> => { |
| 47 | + return Effect.gen(function* () { |
| 48 | + // Step 1: Detect language (or use override) |
| 49 | + const language: Language | null = |
| 50 | + options.language ?? detectLanguage(filepath) |
| 51 | + |
| 52 | + if (!language) { |
| 53 | + return yield* Effect.fail(new UnsupportedLanguageError(filepath)) |
| 54 | + } |
| 55 | + |
| 56 | + // Step 2: Parse the code |
| 57 | + const parseResult = yield* Effect.tryPromise({ |
| 58 | + try: () => parseCode(code, language), |
| 59 | + catch: (error: unknown) => |
| 60 | + new ChunkingError('Failed to parse code', error), |
| 61 | + }) |
| 62 | + |
| 63 | + // Step 3: Extract entities from AST |
| 64 | + const entities = yield* Effect.mapError( |
| 65 | + extractEntities(parseResult.tree.rootNode, language, code), |
| 66 | + (error: unknown) => |
| 67 | + new ChunkingError('Failed to extract entities', error), |
| 68 | + ) |
| 69 | + |
| 70 | + // Step 4: Build scope tree |
| 71 | + const scopeTree = yield* Effect.mapError( |
| 72 | + buildScopeTree(entities), |
| 73 | + (error: unknown) => |
| 74 | + new ChunkingError('Failed to build scope tree', error), |
| 75 | + ) |
| 76 | + |
| 77 | + // Step 5: Chunk the code |
| 78 | + const chunks = yield* Effect.mapError( |
| 79 | + chunkInternal( |
| 80 | + parseResult.tree.rootNode, |
| 81 | + code, |
| 82 | + scopeTree, |
| 83 | + language, |
| 84 | + options, |
| 85 | + ), |
| 86 | + (error: unknown) => new ChunkingError('Failed to chunk code', error), |
| 87 | + ) |
| 88 | + |
| 89 | + // If there was a parse error (but recoverable), attach it to chunk contexts |
| 90 | + if (parseResult.error) { |
| 91 | + const errorInfo = parseResult.error |
| 92 | + return chunks.map((c: Chunk) => ({ |
| 93 | + ...c, |
| 94 | + context: { |
| 95 | + ...c.context, |
| 96 | + parseError: errorInfo, |
| 97 | + }, |
| 98 | + })) |
| 99 | + } |
| 100 | + |
| 101 | + return chunks |
| 102 | + }) |
| 103 | +} |
| 104 | + |
| 105 | +/** |
| 106 | + * Chunk source code into pieces with semantic context |
| 107 | + * |
| 108 | + * This is the main entry point for the astchunk library. It takes source code |
| 109 | + * and returns an array of chunks, each with contextual information about the |
| 110 | + * code's structure. |
| 111 | + * |
| 112 | + * @param filepath - The file path (used for language detection) |
| 113 | + * @param code - The source code to chunk |
| 114 | + * @param options - Optional chunking configuration |
| 115 | + * @returns Array of chunks with context |
| 116 | + * @throws ChunkingError if chunking fails |
| 117 | + * @throws UnsupportedLanguageError if the file type is not supported |
| 118 | + * |
| 119 | + * @example |
| 120 | + * ```ts |
| 121 | + * import { chunk } from 'astchunk' |
| 122 | + * |
| 123 | + * const chunks = await chunk('src/utils.ts', sourceCode) |
| 124 | + * for (const chunk of chunks) { |
| 125 | + * console.log(chunk.text, chunk.context) |
| 126 | + * } |
| 127 | + * ``` |
| 128 | + */ |
| 129 | +export async function chunk( |
| 130 | + filepath: string, |
| 131 | + code: string, |
| 132 | + options?: ChunkOptions, |
| 133 | +): Promise<Chunk[]> { |
| 134 | + return Effect.runPromise(chunkEffect(filepath, code, options)) |
| 135 | +} |
| 136 | + |
| 137 | +/** |
| 138 | + * Chunk source code synchronously (blocking) |
| 139 | + * |
| 140 | + * **WARNING: Not yet implemented.** This function will throw an error. |
| 141 | + * The chunking pipeline requires async WASM loading which cannot run synchronously. |
| 142 | + * Use the async `chunk()` function instead. |
| 143 | + * |
| 144 | + * @param _filepath - The file path (unused) |
| 145 | + * @param _code - The source code (unused) |
| 146 | + * @param _options - Optional chunking configuration (unused) |
| 147 | + * @throws Error Always throws - sync chunking is not supported |
| 148 | + * |
| 149 | + * @deprecated Use `chunk()` instead. This will be implemented in a future version |
| 150 | + * if there's demand for sync operation with pre-initialized parsers. |
| 151 | + */ |
| 152 | +export function chunkSync( |
| 153 | + _filepath: string, |
| 154 | + _code: string, |
| 155 | + _options?: ChunkOptions, |
| 156 | +): Chunk[] { |
| 157 | + throw new Error( |
| 158 | + 'chunkSync is not supported. The chunking pipeline requires async WASM loading. Use chunk() instead.', |
| 159 | + ) |
| 160 | +} |
0 commit comments