Skip to content

Commit d6e7367

Browse files
authored
Merge pull request #16 from supermemoryai/12-18-add_eval_harness_for_repoeval_benchmark
12 18 add eval harness for repoeval benchmark
2 parents d1e84d8 + 7d7f93d commit d6e7367

File tree

11 files changed

+1054
-5
lines changed

11 files changed

+1054
-5
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,5 @@ yarn-error.log*
2020
.turbo
2121
todo.md
2222
plan.md
23+
eval/cache
24+
eval/results

eval/chunkers/ast.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/**
2+
* AST-aware chunker wrapper for evaluation
3+
*
4+
* Wraps the astchunk library for use in the evaluation harness.
5+
* Uses the built-in contextualizedText for better embedding quality.
6+
*/
7+
8+
import { chunk } from '../../src'
9+
10+
/**
11+
* Chunk a file using AST-aware chunking and return results
12+
* in a format compatible with the evaluation
13+
*
14+
* @param filepath - Path to the file
15+
* @param code - Source code content
16+
* @param maxNws - Maximum NWS characters per chunk (default: 1500)
17+
*/
18+
export async function chunkFile(
19+
filepath: string,
20+
code: string,
21+
maxNws: number = 1500,
22+
): Promise<
23+
Array<{
24+
id: string
25+
text: string
26+
startLine: number
27+
endLine: number
28+
}>
29+
> {
30+
const chunks = await chunk(filepath, code, {
31+
maxChunkSize: maxNws,
32+
})
33+
34+
return chunks.map((c) => ({
35+
id: `${filepath}:${c.lineRange.start}-${c.lineRange.end}`,
36+
text: c.contextualizedText,
37+
startLine: c.lineRange.start,
38+
endLine: c.lineRange.end,
39+
}))
40+
}

eval/debug_chunks.ts

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import { readFileSync } from 'node:fs'
2+
import { join } from 'node:path'
3+
import { chunk } from '../src'
4+
import { chunkFixed } from './chunkers/fixed'
5+
6+
// Check deepmind_tracr/tracr/craft/transformers.py
7+
// Assume we're looking for lines 100-150
8+
const testFile = join(
9+
import.meta.dir,
10+
'data/repoeval/repositories/function_level/deepmind_tracr/tracr/craft/transformers.py',
11+
)
12+
const code = readFileSync(testFile, 'utf-8')
13+
const targetStart = 100
14+
const targetEnd = 150
15+
16+
console.log('File:', testFile)
17+
console.log('Target lines:', targetStart, '-', targetEnd)
18+
console.log('')
19+
20+
function countNws(text: string): number {
21+
let count = 0
22+
for (let i = 0; i < text.length; i++) {
23+
if (text.charCodeAt(i) > 32) count++
24+
}
25+
return count
26+
}
27+
28+
function overlaps(
29+
chunkStart: number,
30+
chunkEnd: number,
31+
tStart: number,
32+
tEnd: number,
33+
): boolean {
34+
return !(chunkEnd < tStart || chunkStart > tEnd)
35+
}
36+
37+
for (const maxSize of [1500, 1800]) {
38+
console.log(`\n=== Max chunk size: ${maxSize} ===`)
39+
40+
const astChunks = await chunk(testFile, code, { maxChunkSize: maxSize })
41+
const fixedChunks = chunkFixed(code, maxSize)
42+
43+
console.log('\nAST chunks:')
44+
for (const c of astChunks) {
45+
const overlap = overlaps(
46+
c.lineRange.start,
47+
c.lineRange.end,
48+
targetStart,
49+
targetEnd,
50+
)
51+
console.log(
52+
` Lines ${c.lineRange.start}-${c.lineRange.end} (${countNws(c.text)} NWS) ${overlap ? '*** RELEVANT ***' : ''}`,
53+
)
54+
}
55+
56+
console.log('\nFixed chunks:')
57+
for (const c of fixedChunks) {
58+
const overlap = overlaps(c.startLine, c.endLine, targetStart, targetEnd)
59+
console.log(
60+
` Lines ${c.startLine}-${c.endLine} (${c.nwsCount} NWS) ${overlap ? '*** RELEVANT ***' : ''}`,
61+
)
62+
}
63+
64+
const astRelevant = astChunks.filter((c) =>
65+
overlaps(c.lineRange.start, c.lineRange.end, targetStart, targetEnd),
66+
)
67+
const fixedRelevant = fixedChunks.filter((c) =>
68+
overlaps(c.startLine, c.endLine, targetStart, targetEnd),
69+
)
70+
71+
console.log(
72+
`\nRelevant chunks: AST=${astRelevant.length}, Fixed=${fixedRelevant.length}`,
73+
)
74+
console.log(
75+
`Total chunks: AST=${astChunks.length}, Fixed=${fixedChunks.length}`,
76+
)
77+
}

eval/download.ts

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/**
2+
* Download RepoEval benchmark data
3+
*
4+
* Downloads:
5+
* 1. Task datasets (queries, ground truth) from Microsoft CodeT repo
6+
* 2. Function-level Python repositories for chunking
7+
*/
8+
9+
import { existsSync } from 'node:fs'
10+
import { mkdir, writeFile } from 'node:fs/promises'
11+
import { join } from 'node:path'
12+
13+
const DATA_DIR = join(import.meta.dir, 'data', 'repoeval')
14+
const DATASETS_DIR = join(DATA_DIR, 'datasets')
15+
const REPOS_DIR = join(DATA_DIR, 'repositories', 'function_level')
16+
17+
// Function-level repositories from RepoEval
18+
const REPOS_FUNCTION = [
19+
'amazon-science_patchcore-inspection',
20+
'deepmind_tracr',
21+
'facebookresearch_omnivore',
22+
'google_lightweight_mmm',
23+
'lucidrains_imagen-pytorch',
24+
'maxhumber_redframes',
25+
]
26+
27+
async function downloadAndExtractZip(
28+
url: string,
29+
destDir: string,
30+
): Promise<void> {
31+
console.log(`Downloading from ${url}...`)
32+
33+
const response = await fetch(url)
34+
if (!response.ok) {
35+
throw new Error(`Failed to download: ${response.statusText}`)
36+
}
37+
38+
const arrayBuffer = await response.arrayBuffer()
39+
const tempZipPath = join(destDir, '_temp.zip')
40+
41+
await mkdir(destDir, { recursive: true })
42+
await writeFile(tempZipPath, new Uint8Array(arrayBuffer))
43+
44+
// Use unzip command
45+
const proc = Bun.spawn(['unzip', '-o', '-q', tempZipPath, '-d', destDir], {
46+
cwd: destDir,
47+
})
48+
await proc.exited
49+
50+
// Clean up temp file
51+
await Bun.spawn(['rm', tempZipPath]).exited
52+
53+
console.log(`Extracted to ${destDir}`)
54+
}
55+
56+
async function downloadDatasets(): Promise<void> {
57+
if (existsSync(DATASETS_DIR)) {
58+
console.log('Datasets already downloaded, skipping...')
59+
return
60+
}
61+
62+
const datasetsUrl =
63+
'https://github.com/microsoft/CodeT/raw/main/RepoCoder/datasets/datasets.zip'
64+
await downloadAndExtractZip(datasetsUrl, DATASETS_DIR)
65+
}
66+
67+
async function downloadRepositories(): Promise<void> {
68+
if (existsSync(REPOS_DIR)) {
69+
console.log('Repositories already downloaded, skipping...')
70+
return
71+
}
72+
73+
// Using the cleaned version from Veronicium's fork
74+
const reposUrl =
75+
'https://github.com/Veronicium/repoeval_debug/raw/main/function_level.zip'
76+
await downloadAndExtractZip(reposUrl, REPOS_DIR)
77+
}
78+
79+
export interface RepoEvalTask {
80+
prompt: string
81+
metadata: {
82+
task_id: string
83+
ground_truth: string
84+
fpath_tuple: string[]
85+
line_no: number
86+
lineno: number
87+
context_start_lineno: number
88+
}
89+
}
90+
91+
export async function loadTasks(
92+
contextLength: '1k' | '2k' | '4k' = '2k',
93+
): Promise<RepoEvalTask[]> {
94+
const fileName = `function_level_completion_${contextLength}_context_codex.test.jsonl`
95+
const filePath = join(DATASETS_DIR, fileName)
96+
97+
const content = await Bun.file(filePath).text()
98+
const lines = content.trim().split('\n')
99+
100+
const tasks: RepoEvalTask[] = []
101+
const repo2idx: Record<string, number> = {}
102+
103+
for (const line of lines) {
104+
const task = JSON.parse(line) as RepoEvalTask
105+
106+
// Clean up task_id format
107+
const repo = task.metadata.task_id.replace('--', '_').split('/')[0]
108+
if (!REPOS_FUNCTION.includes(repo)) continue
109+
110+
if (!(repo in repo2idx)) {
111+
repo2idx[repo] = 0
112+
}
113+
114+
task.metadata.task_id = task.metadata.task_id
115+
.replace('--', '_')
116+
.replace('idx', String(repo2idx[repo]))
117+
task.metadata.line_no = task.metadata.lineno
118+
repo2idx[repo]++
119+
120+
tasks.push(task)
121+
}
122+
123+
return tasks
124+
}
125+
126+
export function getReposDir(): string {
127+
return REPOS_DIR
128+
}
129+
130+
export function getRepos(): string[] {
131+
return REPOS_FUNCTION
132+
}
133+
134+
export async function download(): Promise<void> {
135+
console.log('Downloading RepoEval benchmark data...\n')
136+
137+
await mkdir(DATA_DIR, { recursive: true })
138+
139+
await downloadDatasets()
140+
await downloadRepositories()
141+
142+
console.log('\nDownload complete!')
143+
console.log(`Data stored in: ${DATA_DIR}`)
144+
}
145+
146+
// Run if executed directly
147+
if (import.meta.main) {
148+
await download()
149+
}

0 commit comments

Comments
 (0)