@@ -23,8 +23,8 @@ import { embedTexts, topK } from './embeddings'
2323import { aggregateMetrics , computeMetrics } from './metrics'
2424
2525const RESULTS_DIR = join ( import . meta. dir , 'results' )
26- const K = 5 // Top-k for retrieval
27- const MAX_CHUNK_SIZE = 1800 // NWS characters per chunk
26+ const K_VALUES = [ 5 , 10 ] // Top-k values for retrieval
27+ const MAX_CHUNK_SIZE = 1500 // NWS characters per chunk
2828
2929interface ChunkInfo {
3030 id : string
@@ -34,22 +34,28 @@ interface ChunkInfo {
3434 filepath : string
3535}
3636
37+ interface MetricsAtK {
38+ precision : number
39+ recall : number
40+ ndcg : number
41+ }
42+
3743interface QueryResult {
3844 taskId : string
3945 prompt : string
4046 groundTruthLines : { start : number ; end : number }
4147 groundTruthFile : string
4248 retrievedChunks : Array < { id : string ; score : number ; rank : number } >
4349 relevantChunkIds : string [ ]
44- metrics : { precision : number ; recall : number ; ndcg : number }
50+ metrics : Record < number , MetricsAtK > // metrics per k value
4551}
4652
4753interface EvalResult {
4854 chunker : 'ast' | 'fixed'
4955 repo : string
50- summary : { precision : number ; recall : number ; ndcg : number }
56+ summary : Record < number , MetricsAtK > // summary per k value
5157 queryResults : QueryResult [ ]
52- config : { k : number ; maxChunkSize : number }
58+ config : { kValues : number [ ] ; maxChunkSize : number }
5359 timestamp : string
5460}
5561
@@ -161,12 +167,14 @@ async function evaluateRepo(
161167 )
162168 }
163169
170+ const maxK = Math . max ( ...K_VALUES )
171+
164172 for ( let i = 0 ; i < tasks . length ; i ++ ) {
165173 const task = tasks [ i ]
166174 const queryEmb = queryEmbeddings [ i ]
167175
168- // Get top-k chunks
169- const topKResults = topK ( queryEmb , chunkEmbeddings , K )
176+ // Get top-k chunks (use max k to get all we need)
177+ const topKResults = topK ( queryEmb , chunkEmbeddings , maxK )
170178
171179 // Determine ground truth: chunks that overlap with target location
172180 // fpath_tuple is ["repo_name", "path", "to", "file.py"], skip first element
@@ -197,8 +205,11 @@ async function evaluateRepo(
197205 // Get retrieved chunk IDs
198206 const retrievedIds = topKResults . map ( ( r ) => allChunks [ r . index ] . id )
199207
200- // Compute metrics
201- const metrics = computeMetrics ( retrievedIds , relevantSet , K )
208+ // Compute metrics for each k value
209+ const metrics : Record < number , MetricsAtK > = { }
210+ for ( const k of K_VALUES ) {
211+ metrics [ k ] = computeMetrics ( retrievedIds , relevantSet , k )
212+ }
202213
203214 queryResults . push ( {
204215 taskId : task . metadata . task_id ,
@@ -215,29 +226,51 @@ async function evaluateRepo(
215226 } )
216227 }
217228
218- // Aggregate metrics
219- const summary = aggregateMetrics ( queryResults . map ( ( q ) => q . metrics ) )
229+ // Aggregate metrics for each k value
230+ const summary : Record < number , MetricsAtK > = { }
231+ for ( const k of K_VALUES ) {
232+ summary [ k ] = aggregateMetrics ( queryResults . map ( ( q ) => q . metrics [ k ] ) )
233+ }
220234
221235 return {
222236 chunker : chunkerType ,
223237 repo,
224238 summary,
225239 queryResults,
226- config : { k : K , maxChunkSize : MAX_CHUNK_SIZE } ,
240+ config : { kValues : K_VALUES , maxChunkSize : MAX_CHUNK_SIZE } ,
227241 timestamp : new Date ( ) . toISOString ( ) ,
228242 }
229243}
230244
231245/**
232- * Format metrics as a table row
246+ * Format metrics as a table row for a specific k
233247 */
234- function formatMetrics (
235- label : string ,
236- metrics : { precision : number ; recall : number ; ndcg : number } ,
237- ) : string {
248+ function formatMetricsRow ( label : string , metrics : MetricsAtK ) : string {
238249 return `${ label . padEnd ( 20 ) } | ${ ( metrics . ndcg * 100 ) . toFixed ( 1 ) . padStart ( 6 ) } | ${ ( metrics . precision * 100 ) . toFixed ( 1 ) . padStart ( 6 ) } | ${ ( metrics . recall * 100 ) . toFixed ( 1 ) . padStart ( 6 ) } `
239250}
240251
252+ /**
253+ * Print metrics table for all k values
254+ */
255+ function printMetricsTable (
256+ astSummary : Record < number , MetricsAtK > ,
257+ fixedSummary : Record < number , MetricsAtK > ,
258+ indent = '' ,
259+ ) : void {
260+ for ( const k of K_VALUES ) {
261+ console . log ( `${ indent } k=${ k } :` )
262+ console . log ( indent + '-' . repeat ( 50 ) )
263+ console . log (
264+ `${ indent } ${ 'Chunker' . padEnd ( 20 ) } | ${ 'nDCG' . padStart ( 6 ) } | ${ 'P@k' . padStart ( 6 ) } | ${ 'R@k' . padStart ( 6 ) } ` ,
265+ )
266+ console . log ( indent + '-' . repeat ( 50 ) )
267+ console . log ( indent + formatMetricsRow ( 'AST' , astSummary [ k ] ) )
268+ console . log ( indent + formatMetricsRow ( 'Fixed' , fixedSummary [ k ] ) )
269+ console . log ( indent + '-' . repeat ( 50 ) )
270+ console . log ( '' )
271+ }
272+ }
273+
241274async function main ( ) {
242275 console . log ( 'RepoEval Retrieval Evaluation' )
243276 console . log ( '=============================\n' )
@@ -290,14 +323,7 @@ async function main() {
290323
291324 // Print comparison
292325 console . log ( `\n Results for ${ repo } :` )
293- console . log ( ' ' + '-' . repeat ( 50 ) )
294- console . log (
295- ` ${ 'Chunker' . padEnd ( 20 ) } | ${ 'nDCG@5' . padStart ( 6 ) } | ${ 'P@5' . padStart ( 6 ) } | ${ 'R@5' . padStart ( 6 ) } ` ,
296- )
297- console . log ( ' ' + '-' . repeat ( 50 ) )
298- console . log ( ' ' + formatMetrics ( 'AST' , astResult . summary ) )
299- console . log ( ' ' + formatMetrics ( 'Fixed' , fixedResult . summary ) )
300- console . log ( ' ' + '-' . repeat ( 50 ) )
326+ printMetricsTable ( astResult . summary , fixedResult . summary , ' ' )
301327 }
302328
303329 // Step 4: Compute overall summary
@@ -308,36 +334,42 @@ async function main() {
308334 const astResults = allResults . filter ( ( r ) => r . chunker === 'ast' )
309335 const fixedResults = allResults . filter ( ( r ) => r . chunker === 'fixed' )
310336
311- const astOverall = aggregateMetrics ( astResults . map ( ( r ) => r . summary ) )
312- const fixedOverall = aggregateMetrics ( fixedResults . map ( ( r ) => r . summary ) )
337+ // Aggregate metrics for each k value
338+ const astOverall : Record < number , MetricsAtK > = { }
339+ const fixedOverall : Record < number , MetricsAtK > = { }
340+ for ( const k of K_VALUES ) {
341+ astOverall [ k ] = aggregateMetrics ( astResults . map ( ( r ) => r . summary [ k ] ) )
342+ fixedOverall [ k ] = aggregateMetrics ( fixedResults . map ( ( r ) => r . summary [ k ] ) )
343+ }
313344
314- console . log (
315- `\n${ 'Chunker' . padEnd ( 20 ) } | ${ 'nDCG@5' . padStart ( 6 ) } | ${ 'P@5' . padStart ( 6 ) } | ${ 'R@5' . padStart ( 6 ) } ` ,
316- )
317- console . log ( '-' . repeat ( 50 ) )
318- console . log ( formatMetrics ( 'AST' , astOverall ) )
319- console . log ( formatMetrics ( 'Fixed' , fixedOverall ) )
320- console . log ( '-' . repeat ( 50 ) )
321-
322- // Compute improvements
323- const ndcgImprovement =
324- ( ( astOverall . ndcg - fixedOverall . ndcg ) / fixedOverall . ndcg ) * 100
325- const precImprovement =
326- ( ( astOverall . precision - fixedOverall . precision ) / fixedOverall . precision ) *
327- 100
328- const recallImprovement =
329- ( ( astOverall . recall - fixedOverall . recall ) / fixedOverall . recall ) * 100
330-
331- console . log ( `\nImprovement (AST vs Fixed):` )
332- console . log (
333- ` nDCG@5: ${ ndcgImprovement >= 0 ? '+' : '' } ${ ndcgImprovement . toFixed ( 1 ) } %` ,
334- )
335- console . log (
336- ` Precision@5: ${ precImprovement >= 0 ? '+' : '' } ${ precImprovement . toFixed ( 1 ) } %` ,
337- )
338- console . log (
339- ` Recall@5: ${ recallImprovement >= 0 ? '+' : '' } ${ recallImprovement . toFixed ( 1 ) } %` ,
340- )
345+ console . log ( '' )
346+ printMetricsTable ( astOverall , fixedOverall )
347+
348+ // Compute improvements for each k
349+ console . log ( 'Improvement (AST vs Fixed):' )
350+ for ( const k of K_VALUES ) {
351+ const ndcgImprovement =
352+ ( ( astOverall [ k ] . ndcg - fixedOverall [ k ] . ndcg ) / fixedOverall [ k ] . ndcg ) * 100
353+ const precImprovement =
354+ ( ( astOverall [ k ] . precision - fixedOverall [ k ] . precision ) /
355+ fixedOverall [ k ] . precision ) *
356+ 100
357+ const recallImprovement =
358+ ( ( astOverall [ k ] . recall - fixedOverall [ k ] . recall ) /
359+ fixedOverall [ k ] . recall ) *
360+ 100
361+
362+ console . log ( ` k=${ k } :` )
363+ console . log (
364+ ` nDCG: ${ ndcgImprovement >= 0 ? '+' : '' } ${ ndcgImprovement . toFixed ( 1 ) } %` ,
365+ )
366+ console . log (
367+ ` Precision: ${ precImprovement >= 0 ? '+' : '' } ${ precImprovement . toFixed ( 1 ) } %` ,
368+ )
369+ console . log (
370+ ` Recall: ${ recallImprovement >= 0 ? '+' : '' } ${ recallImprovement . toFixed ( 1 ) } %` ,
371+ )
372+ }
341373
342374 // Step 5: Save results
343375 const timestamp = new Date ( ) . toISOString ( ) . replace ( / [: .] / g, '-' )
@@ -351,11 +383,6 @@ async function main() {
351383 overall : {
352384 ast : astOverall ,
353385 fixed : fixedOverall ,
354- improvement : {
355- ndcg : ndcgImprovement ,
356- precision : precImprovement ,
357- recall : recallImprovement ,
358- } ,
359386 } ,
360387 perRepo : Object . fromEntries (
361388 repos . map ( ( repo ) => [
@@ -366,7 +393,7 @@ async function main() {
366393 } ,
367394 ] ) ,
368395 ) ,
369- config : { k : K , maxChunkSize : MAX_CHUNK_SIZE } ,
396+ config : { kValues : K_VALUES , maxChunkSize : MAX_CHUNK_SIZE } ,
370397 timestamp : new Date ( ) . toISOString ( ) ,
371398 } ,
372399 null ,
0 commit comments