Skip to content

Commit 9e224f1

Browse files
committed
add missing eval files: fixed chunker, metrics
1 parent d6e7367 commit 9e224f1

File tree

3 files changed

+164
-3
lines changed

3 files changed

+164
-3
lines changed

eval/chunkers/fixed.ts

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/**
2+
* Fixed-size chunker for evaluation baseline
3+
*
4+
* Simple line-based chunker that splits code into fixed-size chunks
5+
* based on non-whitespace character count. Used as a baseline comparison
6+
* for the AST-aware chunker.
7+
*/
8+
9+
/**
10+
* Count non-whitespace characters in a string
11+
*/
12+
function countNws(text: string): number {
13+
let count = 0
14+
for (let i = 0; i < text.length; i++) {
15+
if (text.charCodeAt(i) > 32) count++
16+
}
17+
return count
18+
}
19+
20+
/**
21+
* Chunk a file using fixed-size chunking based on NWS character count
22+
*
23+
* @param filepath - Path to the file (used for chunk IDs)
24+
* @param code - Source code content
25+
* @param maxNws - Maximum NWS characters per chunk (default: 1500)
26+
*/
27+
export async function chunkFile(
28+
filepath: string,
29+
code: string,
30+
maxNws: number = 1500,
31+
): Promise<
32+
Array<{
33+
id: string
34+
text: string
35+
startLine: number
36+
endLine: number
37+
}>
38+
> {
39+
const lines = code.split('\n')
40+
const chunks: Array<{
41+
id: string
42+
text: string
43+
startLine: number
44+
endLine: number
45+
}> = []
46+
47+
let currentLines: string[] = []
48+
let currentNws = 0
49+
let startLine = 0
50+
51+
for (let i = 0; i < lines.length; i++) {
52+
const line = lines[i] ?? ''
53+
const lineNws = countNws(line)
54+
55+
if (currentNws + lineNws > maxNws && currentLines.length > 0) {
56+
// Flush current chunk
57+
const text = currentLines.join('\n')
58+
const endLine = startLine + currentLines.length - 1
59+
chunks.push({
60+
id: `${filepath}:${startLine}-${endLine}`,
61+
text,
62+
startLine,
63+
endLine,
64+
})
65+
66+
// Start new chunk
67+
currentLines = [line]
68+
currentNws = lineNws
69+
startLine = i
70+
} else {
71+
currentLines.push(line)
72+
currentNws += lineNws
73+
}
74+
}
75+
76+
// Flush remaining lines
77+
if (currentLines.length > 0) {
78+
const text = currentLines.join('\n')
79+
const endLine = startLine + currentLines.length - 1
80+
chunks.push({
81+
id: `${filepath}:${startLine}-${endLine}`,
82+
text,
83+
startLine,
84+
endLine,
85+
})
86+
}
87+
88+
return chunks
89+
}

eval/debug_chunks.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { readFileSync } from 'node:fs'
22
import { join } from 'node:path'
33
import { chunk } from '../src'
4-
import { chunkFixed } from './chunkers/fixed'
4+
import { chunkFile as chunkFixed } from './chunkers/fixed'
55

66
// Check deepmind_tracr/tracr/craft/transformers.py
77
// Assume we're looking for lines 100-150
@@ -38,7 +38,7 @@ for (const maxSize of [1500, 1800]) {
3838
console.log(`\n=== Max chunk size: ${maxSize} ===`)
3939

4040
const astChunks = await chunk(testFile, code, { maxChunkSize: maxSize })
41-
const fixedChunks = chunkFixed(code, maxSize)
41+
const fixedChunks = await chunkFixed(testFile, code, maxSize)
4242

4343
console.log('\nAST chunks:')
4444
for (const c of astChunks) {
@@ -57,7 +57,7 @@ for (const maxSize of [1500, 1800]) {
5757
for (const c of fixedChunks) {
5858
const overlap = overlaps(c.startLine, c.endLine, targetStart, targetEnd)
5959
console.log(
60-
` Lines ${c.startLine}-${c.endLine} (${c.nwsCount} NWS) ${overlap ? '*** RELEVANT ***' : ''}`,
60+
` Lines ${c.startLine}-${c.endLine} (${countNws(c.text)} NWS) ${overlap ? '*** RELEVANT ***' : ''}`,
6161
)
6262
}
6363

eval/metrics.ts

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/**
2+
* Retrieval metrics for evaluation
3+
*
4+
* Computes precision, recall, and nDCG for retrieval evaluation.
5+
*/
6+
7+
/**
8+
* Compute precision, recall, and nDCG for a single query
9+
*
10+
* @param retrievedIds - Ordered list of retrieved chunk IDs
11+
* @param relevantSet - Set of relevant (ground truth) chunk IDs
12+
* @param k - Number of results to consider
13+
*/
14+
export function computeMetrics(
15+
retrievedIds: string[],
16+
relevantSet: Set<string>,
17+
k: number,
18+
): { precision: number; recall: number; ndcg: number } {
19+
const topK = retrievedIds.slice(0, k)
20+
21+
// Precision@k: fraction of retrieved that are relevant
22+
const relevantInTopK = topK.filter((id) => relevantSet.has(id)).length
23+
const precision = relevantInTopK / k
24+
25+
// Recall@k: fraction of relevant that are retrieved
26+
const recall = relevantSet.size > 0 ? relevantInTopK / relevantSet.size : 0
27+
28+
// nDCG@k: normalized discounted cumulative gain
29+
const dcg = topK.reduce((sum, id, i) => {
30+
const rel = relevantSet.has(id) ? 1 : 0
31+
return sum + rel / Math.log2(i + 2) // i+2 because log2(1) = 0
32+
}, 0)
33+
34+
// Ideal DCG: all relevant docs at top
35+
const idealK = Math.min(k, relevantSet.size)
36+
const idcg = Array.from({ length: idealK }).reduce<number>(
37+
(sum, _, i) => sum + 1 / Math.log2(i + 2),
38+
0,
39+
)
40+
41+
const ndcg = idcg > 0 ? dcg / idcg : 0
42+
43+
return { precision, recall, ndcg }
44+
}
45+
46+
/**
47+
* Aggregate metrics across multiple queries
48+
*
49+
* @param metrics - Array of metric objects
50+
*/
51+
export function aggregateMetrics(
52+
metrics: Array<{ precision: number; recall: number; ndcg: number }>,
53+
): { precision: number; recall: number; ndcg: number } {
54+
if (metrics.length === 0) {
55+
return { precision: 0, recall: 0, ndcg: 0 }
56+
}
57+
58+
const sum = metrics.reduce(
59+
(acc, m) => ({
60+
precision: acc.precision + m.precision,
61+
recall: acc.recall + m.recall,
62+
ndcg: acc.ndcg + m.ndcg,
63+
}),
64+
{ precision: 0, recall: 0, ndcg: 0 },
65+
)
66+
67+
return {
68+
precision: sum.precision / metrics.length,
69+
recall: sum.recall / metrics.length,
70+
ndcg: sum.ndcg / metrics.length,
71+
}
72+
}

0 commit comments

Comments
 (0)