Skip to content

Commit 42c5bda

Browse files
authored
chore(compass-assistant): add all eval cases, tags and CSV conversion script COMPASS-9823 COMPASS-9758 (#7304)
1 parent ac9a6d7 commit 42c5bda

File tree

11 files changed

+655
-85
lines changed

11 files changed

+655
-85
lines changed

package-lock.json

Lines changed: 96 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test/eval-cases/eval_cases.csv

packages/compass-assistant/package.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@
4848
"test-watch": "npm run test -- --watch",
4949
"test-ci": "npm run test-cov",
5050
"test-ci-electron": "npm run test-electron",
51-
"reformat": "npm run eslint . -- --fix && npm run prettier -- --write ."
51+
"reformat": "npm run eslint . -- --fix && npm run prettier -- --write .",
52+
"convert-eval-cases": "ts-node scripts/convert-csv-to-eval-cases.ts && npm run reformat",
53+
"eval": "braintrust eval test/assistant.eval.ts --verbose"
5254
},
5355
"dependencies": {
5456
"@ai-sdk/openai": "^2.0.4",
@@ -67,6 +69,7 @@
6769
"use-sync-external-store": "^1.5.0"
6870
},
6971
"devDependencies": {
72+
"@fast-csv/parse": "^5.0.5",
7073
"@mongodb-js/eslint-config-compass": "^1.4.9",
7174
"@mongodb-js/mocha-config-compass": "^1.7.1",
7275
"@mongodb-js/prettier-config-compass": "^1.2.8",
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#!/usr/bin/env ts-node
2+
/* eslint-disable no-console */
3+
// eslint-disable-next-line @typescript-eslint/no-restricted-imports
4+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
5+
// eslint-disable-next-line @typescript-eslint/no-restricted-imports
6+
import { join, resolve } from 'path';
7+
import { parse } from '@fast-csv/parse';
8+
import type { SimpleEvalCase } from '../test/assistant.eval';
9+
10+
/** This is copied from the Compass Assistant PD Eval Cases */
11+
type CSVRow = {
12+
'Your Name': string;
13+
'Interaction Type\n(can add other types)': string;
14+
'Input\nHighlighting key: \nHardcoded\n\nContextual passed from client to assistant\n\nUser-entered': string;
15+
'Expected Output\n(target 100-200 words, okay to go over if needed)': string;
16+
'Expected Links\n(comma separated please)': string;
17+
Notes: string;
18+
};
19+
20+
const interactionTypeTags = {
21+
'End-User Input Only': 'end-user-input',
22+
'Connection Error': 'connection-error',
23+
'DNS Error': 'dns-error',
24+
'Explain Plan': 'explain-plan',
25+
'Proactive Perf': 'proactive-performance-insights',
26+
'General network error': 'general-network-error',
27+
OIDC: 'oidc',
28+
TLS: 'tls-ssl',
29+
SSL: 'tls-ssl',
30+
};
31+
32+
function escapeString(str: string): string {
33+
return str
34+
.replace(/\\/g, '\\\\')
35+
.replace(/`/g, '\\`')
36+
.replace(/\${/g, '\\${')
37+
.replace(/\r?\n/g, '\\n') // Handle newlines
38+
.replace(/[\u200B-\u200D\uFEFF\u2028\u2029]/g, '') // Remove zero-width spaces and other invisible characters
39+
.replace(/[^\S ]/g, ' ') // Replace all whitespace except normal spaces with spaces
40+
.replace(/\s+/g, ' ') // Collapse multiple spaces
41+
.trim(); // Remove leading/trailing whitespace
42+
}
43+
44+
function generateEvalCaseFile(cases: SimpleEvalCase[]): string {
45+
const caseDefinitions = cases
46+
.map((evalCase) => {
47+
const sourcesPart =
48+
evalCase.expectedSources && evalCase.expectedSources.length > 0
49+
? ` expectedSources: [\n ${evalCase.expectedSources
50+
.map((source) => `'${escapeString(source)}'`)
51+
.join(',\n ')},\n ],`
52+
: '';
53+
54+
const tagsPart =
55+
evalCase.tags && evalCase.tags.length > 0
56+
? ` tags: [\n ${evalCase.tags
57+
.map((tag) => `'${escapeString(tag)}'`)
58+
.join(',\n ')},\n ],`
59+
: '';
60+
61+
return ` {
62+
input: \`${escapeString(evalCase.input)}\`,
63+
expected: \`${escapeString(evalCase.expected)}\`,${
64+
sourcesPart ? '\n' + sourcesPart : ''
65+
}${tagsPart ? '\n' + tagsPart : ''}
66+
}`;
67+
})
68+
.join(',\n');
69+
70+
return `/** This file is auto-generated by the convert-csv-to-eval-cases script.
71+
Do not modify this file manually. */
72+
import type { SimpleEvalCase } from '../assistant.eval';
73+
74+
export const generatedEvalCases: SimpleEvalCase[] = [
75+
${caseDefinitions},
76+
];
77+
`;
78+
}
79+
80+
async function convertCSVToEvalCases() {
81+
const scriptDir = __dirname;
82+
const csvFilePath = resolve(scriptDir, '../test/eval-cases/eval_cases.csv');
83+
// Check that the CSV file exists
84+
if (!existsSync(csvFilePath)) {
85+
console.error(
86+
`The CSV file does not exist: ${csvFilePath}. Please import it and try again.`
87+
);
88+
process.exit(1);
89+
}
90+
const outputDir = resolve(scriptDir, '../test/eval-cases');
91+
92+
console.log('Converting CSV to eval cases...');
93+
console.log(`Reading from: ${csvFilePath}`);
94+
console.log(`Output directory: ${outputDir}`);
95+
96+
// Ensure output directory exists
97+
mkdirSync(outputDir, { recursive: true });
98+
99+
const allCases: SimpleEvalCase[] = [];
100+
101+
// Read and parse CSV using async/await
102+
const csvContent = readFileSync(csvFilePath, 'utf8');
103+
104+
const rows = await new Promise<CSVRow[]>((resolve, reject) => {
105+
const results: CSVRow[] = [];
106+
const stream = parse({
107+
headers: true,
108+
})
109+
.on('data', (row: CSVRow) => results.push(row))
110+
.on('end', () => resolve(results))
111+
.on('error', reject);
112+
113+
stream.write(csvContent);
114+
stream.end();
115+
});
116+
117+
// Process rows
118+
for (const row of rows) {
119+
// Skip empty rows or header-like rows
120+
const input =
121+
row[
122+
'Input\nHighlighting key: \nHardcoded\n\nContextual passed from client to assistant\n\nUser-entered'
123+
]?.trim();
124+
const expected =
125+
row[
126+
'Expected Output\n(target 100-200 words, okay to go over if needed)'
127+
]?.trim();
128+
const yourName = row['Your Name']?.trim();
129+
const interactionType =
130+
row['Interaction Type\n(can add other types)']?.trim();
131+
132+
if (!input || !expected || !yourName || !interactionType) {
133+
continue; // Skip incomplete rows
134+
}
135+
136+
// Parse expected sources
137+
const expectedLinksRaw =
138+
row['Expected Links\n(comma separated please)']?.trim();
139+
let expectedSources: string[] = [];
140+
141+
if (expectedLinksRaw) {
142+
expectedSources = expectedLinksRaw
143+
.replace(/\r?\n/g, ' ') // Replace newlines with spaces first
144+
.split(',')
145+
.map((link) => link.trim())
146+
.filter((link) => link && link.startsWith('http'));
147+
}
148+
149+
const tags: SimpleEvalCase['tags'] = [];
150+
151+
if (interactionType) {
152+
for (const tag of Object.keys(interactionTypeTags)) {
153+
if (interactionType.includes(tag)) {
154+
tags.push(
155+
interactionTypeTags[tag as keyof typeof interactionTypeTags] as any
156+
);
157+
}
158+
}
159+
}
160+
161+
const evalCase: SimpleEvalCase = {
162+
input,
163+
expected,
164+
tags,
165+
...(expectedSources.length > 0 && { expectedSources }),
166+
};
167+
168+
allCases.push(evalCase);
169+
}
170+
171+
console.log(`\nProcessed ${allCases.length} cases`);
172+
173+
// Generate single file with all cases
174+
const filename = 'generated-cases';
175+
const filepath = join(outputDir, `${filename}.ts`);
176+
const content = generateEvalCaseFile(allCases);
177+
178+
writeFileSync(filepath, content, 'utf8');
179+
console.log(`✓ Generated ${filename}.ts with ${allCases.length} cases`);
180+
181+
console.log('\n✅ Conversion completed successfully!');
182+
}
183+
184+
convertCSVToEvalCases().catch((error) => {
185+
console.error('❌ Conversion failed:', error);
186+
process.exit(1);
187+
});
188+
189+
export { convertCSVToEvalCases };

0 commit comments

Comments
 (0)