Skip to content

Commit 4a4754d

Browse files
chore: adds tests for the rest of the tools
1 parent 63224d3 commit 4a4754d

13 files changed

+558
-8
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import { describeAccuracyTests, describeSuite } from "./sdk/describe-accuracy-tests.js";
2+
import { getAvailableModels } from "./sdk/models.js";
3+
import { AccuracyTestConfig } from "./sdk/describe-accuracy-tests.js";
4+
import { ExpectedToolCall } from "./sdk/accuracy-scorers.js";
5+
6+
function callsCollectionStorageSize(prompt: string, expectedToolCalls: ExpectedToolCall[]): AccuracyTestConfig {
7+
return {
8+
injectConnectedAssumption: true,
9+
prompt: prompt,
10+
mockedTools: {},
11+
expectedToolCalls: expectedToolCalls,
12+
};
13+
}
14+
15+
describeAccuracyTests(getAvailableModels(), {
16+
...describeSuite("should only call 'collection-storage-size' tool", [
17+
callsCollectionStorageSize("What is the size of 'mflix.movies' namespace", [
18+
{
19+
toolName: "collection-storage-size",
20+
parameters: {
21+
database: "mflix",
22+
collection: "movies",
23+
},
24+
},
25+
]),
26+
]),
27+
...describeSuite("should call 'collection-storage-size' tool after another tool/s", [
28+
callsCollectionStorageSize("How much size is each collection in comics database", [
29+
{
30+
toolName: "list-collections",
31+
parameters: {
32+
database: "comics",
33+
},
34+
},
35+
{
36+
toolName: "collection-storage-size",
37+
parameters: {
38+
database: "comics",
39+
collection: "books",
40+
},
41+
},
42+
{
43+
toolName: "collection-storage-size",
44+
parameters: {
45+
database: "comics",
46+
collection: "characters",
47+
},
48+
},
49+
]),
50+
]),
51+
});

tests/accuracy/count.test.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import { describeAccuracyTests, describeSuite } from "./sdk/describe-accuracy-tests.js";
2+
import { getAvailableModels } from "./sdk/models.js";
3+
import { AccuracyTestConfig } from "./sdk/describe-accuracy-tests.js";
4+
5+
function callsCountToolWithEmptyQuery(prompt: string, database = "mflix", collection = "movies"): AccuracyTestConfig {
6+
return {
7+
injectConnectedAssumption: true,
8+
prompt: prompt,
9+
mockedTools: {},
10+
expectedToolCalls: [
11+
{
12+
toolName: "count",
13+
parameters: {
14+
database,
15+
collection,
16+
},
17+
},
18+
],
19+
};
20+
}
21+
22+
function callsCountToolWithQuery(
23+
prompt: string,
24+
database = "mflix",
25+
collection = "movies",
26+
query: Record<string, unknown> = {}
27+
): AccuracyTestConfig {
28+
return {
29+
injectConnectedAssumption: true,
30+
prompt: prompt,
31+
mockedTools: {},
32+
expectedToolCalls: [
33+
{
34+
toolName: "count",
35+
parameters: {
36+
database,
37+
collection,
38+
query,
39+
},
40+
},
41+
],
42+
};
43+
}
44+
45+
describeAccuracyTests(getAvailableModels(), {
46+
...describeSuite("should only call 'count' tool", [
47+
callsCountToolWithEmptyQuery("Count number of documents in 'mflix.movies' namespace."),
48+
callsCountToolWithEmptyQuery(
49+
"How many documents are there in 'characters' collection in 'comics' database?",
50+
"comics",
51+
"characters"
52+
),
53+
callsCountToolWithQuery(
54+
"Count all the documents in 'mflix.movies' namespace with runtime less than 100?",
55+
"mflix",
56+
"movies",
57+
{ runtime: { $lt: 100 } }
58+
),
59+
]),
60+
});
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import { describeAccuracyTests, describeSuite } from "./sdk/describe-accuracy-tests.js";
2+
import { getAvailableModels } from "./sdk/models.js";
3+
import { AccuracyTestConfig } from "./sdk/describe-accuracy-tests.js";
4+
import { ExpectedToolCall } from "./sdk/accuracy-scorers.js";
5+
6+
function callsCreateCollection(prompt: string, database: string, collection: string): AccuracyTestConfig {
7+
return {
8+
injectConnectedAssumption: true,
9+
prompt: prompt,
10+
mockedTools: {},
11+
expectedToolCalls: [
12+
{
13+
toolName: "create-collection",
14+
parameters: {
15+
database,
16+
collection,
17+
},
18+
},
19+
],
20+
};
21+
}
22+
23+
function callsCreateCollectionWithListCollections(prompt: string, expectedToolCalls: ExpectedToolCall[]) {
24+
return {
25+
injectConnectedAssumption: true,
26+
prompt: prompt,
27+
mockedTools: {},
28+
expectedToolCalls,
29+
};
30+
}
31+
32+
describeAccuracyTests(getAvailableModels(), {
33+
...describeSuite("should only call 'create-collection' tool", [
34+
callsCreateCollection("Create a new namespace 'mflix.documentaries'", "mflix", "documentaries"),
35+
callsCreateCollection("Create a new collection villains in comics database", "comics", "villains"),
36+
]),
37+
...describeSuite("should call 'create-collection' alongside other required tools", [
38+
callsCreateCollectionWithListCollections(
39+
"If and only if, the namespace 'mflix.documentaries' does not exist, then create it",
40+
[
41+
{
42+
toolName: "list-collections",
43+
parameters: {
44+
database: "mflix",
45+
},
46+
},
47+
{
48+
toolName: "create-collection",
49+
parameters: {
50+
database: "mflix",
51+
collection: "documentaries",
52+
},
53+
},
54+
]
55+
),
56+
]),
57+
});

tests/accuracy/db-stats.test.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import { describeAccuracyTests, describeSuite } from "./sdk/describe-accuracy-tests.js";
2+
import { getAvailableModels } from "./sdk/models.js";
3+
import { AccuracyTestConfig } from "./sdk/describe-accuracy-tests.js";
4+
5+
function callsListDatabases(prompt: string, database = "mflix"): AccuracyTestConfig {
6+
return {
7+
injectConnectedAssumption: true,
8+
prompt: prompt,
9+
mockedTools: {},
10+
expectedToolCalls: [
11+
{
12+
toolName: "db-stats",
13+
parameters: {
14+
database,
15+
},
16+
},
17+
],
18+
};
19+
}
20+
21+
describeAccuracyTests(getAvailableModels(), {
22+
...describeSuite("should only call 'db-stats' tool", [
23+
callsListDatabases("What is the size occupied by database mflix?"),
24+
]),
25+
});
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { describeAccuracyTests, describeSuite } from "./sdk/describe-accuracy-tests.js";
2+
import { getAvailableModels } from "./sdk/models.js";
3+
import { AccuracyTestConfig } from "./sdk/describe-accuracy-tests.js";
4+
import { ExpectedToolCall } from "./sdk/accuracy-scorers.js";
5+
6+
function onlyCallsDropCollection(prompt: string): AccuracyTestConfig {
7+
return {
8+
injectConnectedAssumption: true,
9+
prompt: prompt,
10+
mockedTools: {},
11+
expectedToolCalls: [
12+
{
13+
toolName: "drop-collection",
14+
parameters: {
15+
database: "mflix",
16+
collection: "movies",
17+
},
18+
},
19+
],
20+
};
21+
}
22+
23+
function callsDropCollection(prompt: string, expectedToolCalls: ExpectedToolCall[]): AccuracyTestConfig {
24+
return {
25+
injectConnectedAssumption: true,
26+
prompt: prompt,
27+
mockedTools: {},
28+
expectedToolCalls,
29+
};
30+
}
31+
32+
describeAccuracyTests(getAvailableModels(), {
33+
...describeSuite("should only call 'drop-collection' tool", [
34+
onlyCallsDropCollection("Remove mflix.movies namespace from my cluster."),
35+
onlyCallsDropCollection("Drop movies collection from mflix database."),
36+
]),
37+
...describeSuite("should call 'drop-collection' after calling other necessary tools", [
38+
callsDropCollection("Remove books collection from which ever database contains it.", [
39+
{
40+
toolName: "list-databases",
41+
parameters: {},
42+
},
43+
{
44+
toolName: "list-collections",
45+
parameters: {
46+
database: "admin",
47+
},
48+
},
49+
{
50+
toolName: "list-collections",
51+
parameters: {
52+
database: "comics",
53+
},
54+
},
55+
{
56+
toolName: "list-collections",
57+
parameters: {
58+
database: "config",
59+
},
60+
},
61+
{
62+
toolName: "list-collections",
63+
parameters: {
64+
database: "local",
65+
},
66+
},
67+
{
68+
toolName: "list-collections",
69+
parameters: {
70+
database: "mflix",
71+
},
72+
},
73+
{
74+
toolName: "drop-collection",
75+
parameters: {
76+
database: "comics",
77+
collection: "books",
78+
},
79+
},
80+
]),
81+
]),
82+
});

tests/accuracy/drop-database.test.ts

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import { describeAccuracyTests, describeSuite } from "./sdk/describe-accuracy-tests.js";
2+
import { getAvailableModels } from "./sdk/models.js";
3+
import { AccuracyTestConfig } from "./sdk/describe-accuracy-tests.js";
4+
import { ExpectedToolCall } from "./sdk/accuracy-scorers.js";
5+
6+
function onlyCallsDropDatabase(prompt: string): AccuracyTestConfig {
7+
return {
8+
injectConnectedAssumption: true,
9+
prompt: prompt,
10+
mockedTools: {},
11+
expectedToolCalls: [
12+
{
13+
toolName: "drop-database",
14+
parameters: {
15+
database: "mflix",
16+
},
17+
},
18+
],
19+
};
20+
}
21+
22+
function callsDropDatabase(prompt: string, expectedToolCalls: ExpectedToolCall[]): AccuracyTestConfig {
23+
return {
24+
injectConnectedAssumption: true,
25+
prompt: prompt,
26+
mockedTools: {},
27+
expectedToolCalls,
28+
};
29+
}
30+
31+
describeAccuracyTests(getAvailableModels(), {
32+
...describeSuite("should only call 'drop-database' tool", [
33+
onlyCallsDropDatabase("Remove mflix database from my cluster."),
34+
onlyCallsDropDatabase("Drop database named mflix."),
35+
]),
36+
...describeSuite("should call 'drop-database' after calling other necessary tools", [
37+
callsDropDatabase("If there is a mflix database in my cluster then drop it.", [
38+
{
39+
toolName: "list-databases",
40+
parameters: {},
41+
},
42+
{
43+
toolName: "drop-database",
44+
parameters: {
45+
database: "mflix",
46+
},
47+
},
48+
]),
49+
]),
50+
});

0 commit comments

Comments
 (0)