Skip to content

Commit e7453fe

Browse files
committed
Type fixes for the evals
1 parent a684ce5 commit e7453fe

File tree

1 file changed

+20
-14
lines changed

1 file changed

+20
-14
lines changed

apps/webapp/evals/aiQuery.eval.ts

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@ function normalizeQuery(query: string): string {
1818
.toLowerCase();
1919
}
2020

21+
// Type for parsed query results
22+
interface ParsedQueryResult {
23+
success: boolean;
24+
query?: string;
25+
error?: string;
26+
}
27+
2128
// Custom scorer that checks if the generated query is semantically similar
2229
// and also syntactically valid
2330
const QuerySimilarity = {
@@ -29,11 +36,15 @@ const QuerySimilarity = {
2936
}: {
3037
input: string;
3138
output: string;
32-
expected: string;
39+
expected?: string;
3340
}) => {
41+
if (!expected) {
42+
return 0;
43+
}
44+
3445
// Parse the output to extract the query
35-
const outputParsed = JSON.parse(output);
36-
const expectedParsed = JSON.parse(expected);
46+
const outputParsed = JSON.parse(output) as ParsedQueryResult;
47+
const expectedParsed = JSON.parse(expected) as ParsedQueryResult;
3748

3849
// Check success status first
3950
if (outputParsed.success !== expectedParsed.success) {
@@ -48,8 +59,8 @@ const QuerySimilarity = {
4859

4960
// If both succeeded, compare the queries
5061
if (outputParsed.success && expectedParsed.success) {
51-
const normalizedOutput = normalizeQuery(outputParsed.query);
52-
const normalizedExpected = normalizeQuery(expectedParsed.query);
62+
const normalizedOutput = normalizeQuery(outputParsed.query ?? "");
63+
const normalizedExpected = normalizeQuery(expectedParsed.query ?? "");
5364

5465
// Key patterns to check
5566
const patterns = [
@@ -78,7 +89,7 @@ const QuerySimilarity = {
7889
for (const pattern of patterns) {
7990
const outputMatch = pattern.test(normalizedOutput);
8091
const expectedMatch = pattern.test(normalizedExpected);
81-
92+
8293
if (expectedMatch) {
8394
totalPatterns++;
8495
if (outputMatch) {
@@ -91,12 +102,11 @@ const QuerySimilarity = {
91102
const patternScore = totalPatterns > 0 ? matchScore / totalPatterns : 0.5;
92103

93104
// Use Levenshtein for overall similarity
94-
const levenshtein = Levenshtein.scorer({
95-
input,
105+
const levenshteinResult = await Levenshtein({
96106
output: normalizedOutput,
97107
expected: normalizedExpected,
98108
});
99-
const levenshteinScore = typeof levenshtein === "number" ? levenshtein : 0;
109+
const levenshteinScore = levenshteinResult?.score ?? 0;
100110

101111
// Weighted combination
102112
return 0.6 * patternScore + 0.4 * levenshteinScore;
@@ -355,14 +365,10 @@ LIMIT 100`,
355365
];
356366
},
357367
task: async (input) => {
358-
const service = new AIQueryService(
359-
[runsSchema],
360-
traceAISDKModel(openai("gpt-4o-mini"))
361-
);
368+
const service = new AIQueryService([runsSchema], traceAISDKModel(openai("gpt-4o-mini")));
362369

363370
const result = await service.call(input);
364371
return JSON.stringify(result);
365372
},
366373
scorers: [QuerySimilarity, Levenshtein],
367374
});
368-

0 commit comments

Comments
 (0)