Skip to content

Commit ad87d41

Browse files
chore: handle test failures and appropriately mark them as failed in accuracyRunStatus
1 parent 1e143ca commit ad87d41

File tree

6 files changed

+34
-34
lines changed

6 files changed

+34
-34
lines changed

scripts/mark-accuracy-run-finished.ts

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,21 @@
11
import { getAccuracySnapshotStorage } from "../tests/accuracy/sdk/accuracy-snapshot-storage/get-snapshot-storage.js";
2+
import {
3+
AccuracyRunStatus,
4+
AccuracyRunStatuses,
5+
} from "../tests/accuracy/sdk/accuracy-snapshot-storage/snapshot-storage.js";
26

3-
console.time(`Marked accuracy run id - ${process.env.MDB_ACCURACY_RUN_ID} as finished in`);
7+
let status: AccuracyRunStatuses | undefined;
8+
if (process.env.MDB_ACCURACY_RUN_STATUS === "done") {
9+
status = AccuracyRunStatus.Done;
10+
} else if (process.env.MDB_ACCURACY_RUN_STATUS === "failed") {
11+
status = AccuracyRunStatus.Failed;
12+
} else {
13+
console.info(`Unknown status - ${process.env.MDB_ACCURACY_RUN_STATUS}, will not update accuracy run.`);
14+
process.exit(1);
15+
}
16+
17+
console.time(`Marked accuracy run id - ${process.env.MDB_ACCURACY_RUN_ID} as ${status} in`);
418
const storage = await getAccuracySnapshotStorage();
5-
await storage.accuracyRunFinished();
19+
await storage.updateAccuracyRunStatus(status);
620
await storage.close();
7-
console.timeEnd(`Marked accuracy run id - ${process.env.MDB_ACCURACY_RUN_ID} as finished in`);
21+
console.timeEnd(`Marked accuracy run id - ${process.env.MDB_ACCURACY_RUN_ID} as ${status} in`);

scripts/run-accuracy-tests.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ node --experimental-vm-modules node_modules/jest/bin/jest.js --testPathPattern "
2222

2323
# Each test run submits an accuracy snapshot entry for each prompt with the
2424
# accuracyRunStatus: "in-progress". When all the tests are done and jest exits
25-
# with an exit code of 0, we can safely mark accuracy run as finished.
25+
# with an exit code of 0, we can safely mark accuracy run as finished otherwise
26+
# failed.
2627
if [ $? -eq 0 ]; then
27-
npx tsx scripts/mark-accuracy-run-finished.ts
28+
MDB_ACCURACY_RUN_STATUS="done" npx tsx scripts/mark-accuracy-run-finished.ts
29+
else
30+
MDB_ACCURACY_RUN_STATUS="failed" npx tsx scripts/mark-accuracy-run-finished.ts
2831
fi

tests/accuracy/collection-schema.test.ts

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,12 @@
11
import { describeAccuracyTests, describeSuite } from "./sdk/describe-accuracy-tests.js";
22
import { getAvailableModels } from "./sdk/models.js";
33
import { AccuracyTestConfig } from "./sdk/describe-accuracy-tests.js";
4-
import { collectionSchemaResponse } from "../../src/tools/mongodb/metadata/collectionSchema.js";
5-
import { getSimplifiedSchema } from "mongodb-schema";
64

75
function callsCollectionSchema(prompt: string): AccuracyTestConfig {
86
return {
97
injectConnectedAssumption: true,
108
prompt: prompt,
11-
mockedTools: {
12-
"collection-schema": async function collectionSchema() {
13-
return collectionSchemaResponse(
14-
"db1",
15-
"coll1",
16-
await getSimplifiedSchema([
17-
{
18-
name: "Sample name1",
19-
dob: "28.11.2001",
20-
location: "NY",
21-
},
22-
{
23-
name: "Sample name1",
24-
dob: "28.11.2001",
25-
location: "NY",
26-
title: "Dr.",
27-
},
28-
])
29-
);
30-
},
31-
},
9+
mockedTools: {},
3210
expectedToolCalls: [
3311
{
3412
toolName: "collection-schema",

tests/accuracy/sdk/accuracy-snapshot-storage/disk-snapshot-storage.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import fs from "fs/promises";
33
import { fileURLToPath } from "url";
44
import {
55
AccuracyRunStatus,
6+
AccuracyRunStatuses,
67
AccuracySnapshotEntry,
78
AccuracySnapshotEntrySchema,
89
AccuracySnapshotStorage,
@@ -57,13 +58,13 @@ export class DiskSnapshotStorage implements AccuracySnapshotStorage {
5758
return latestRunId ? snapshot.filter((entry) => entry.accuracyRunId === latestRunId) : [];
5859
}
5960

60-
async accuracyRunFinished(): Promise<void> {
61+
async updateAccuracyRunStatus(status: AccuracyRunStatuses) {
6162
const snapshot = await this.readSnapshot();
6263
const updatedSnapshot = snapshot.map((entry) => {
6364
if (entry.accuracyRunId === this.accuracyRunId) {
6465
return {
6566
...entry,
66-
accuracyRunStatus: AccuracyRunStatus.Done,
67+
accuracyRunStatus: status,
6768
};
6869
}
6970

tests/accuracy/sdk/accuracy-snapshot-storage/mdb-snapshot-storage.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { Collection, MongoClient } from "mongodb";
22
import {
33
AccuracyRunStatus,
4+
AccuracyRunStatuses,
45
AccuracySnapshotEntry,
56
AccuracySnapshotEntrySchema,
67
AccuracySnapshotStorage,
@@ -76,10 +77,10 @@ export class MongoDBSnapshotStorage implements AccuracySnapshotStorage {
7677
return AccuracySnapshotEntrySchema.array().parse(snapshotEntries);
7778
}
7879

79-
async accuracyRunFinished(): Promise<void> {
80+
async updateAccuracyRunStatus(status: AccuracyRunStatuses) {
8081
await this.snapshotCollection.updateMany(
8182
{ accuracyRunId: this.accuracyRunId },
82-
{ $set: { accuracyRunStatus: AccuracyRunStatus.Done } }
83+
{ $set: { accuracyRunStatus: status } }
8384
);
8485
}
8586

tests/accuracy/sdk/accuracy-snapshot-storage/snapshot-storage.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,17 @@ export type ActualToolCall = z.infer<typeof ActualToolCallSchema>;
1111

1212
export const AccuracyRunStatus = {
1313
Done: "done",
14+
Failed: "failed",
1415
InProgress: "in-progress",
1516
} as const;
1617

18+
export type AccuracyRunStatuses = (typeof AccuracyRunStatus)[keyof typeof AccuracyRunStatus];
19+
1720
export const AccuracySnapshotEntrySchema = z.object({
1821
// Git and meta information for snapshot entries
1922
accuracyRunId: z.string(),
2023
accuracyRunStatus: z
21-
.enum([AccuracyRunStatus.Done, AccuracyRunStatus.InProgress])
24+
.enum([AccuracyRunStatus.Done, AccuracyRunStatus.Failed, AccuracyRunStatus.InProgress])
2225
.default(AccuracyRunStatus.InProgress),
2326
createdOn: z.number(),
2427
commitSHA: z.string(),
@@ -67,7 +70,7 @@ export interface AccuracySnapshotStorage {
6770

6871
getLatestSnapshotsForCommit(commit: string): Promise<AccuracySnapshotEntry[]>;
6972

70-
accuracyRunFinished(): Promise<void>;
73+
updateAccuracyRunStatus(status: AccuracyRunStatuses): Promise<void>;
7174

7275
close(): Promise<void>;
7376
}

0 commit comments

Comments
 (0)