@@ -5,7 +5,8 @@ import { ExpectedToolCall, parameterMatchingAccuracyScorer, toolCallingAccuracyS
5
5
import { Agent , getVercelToolCallingAgent } from "./agent.js" ;
6
6
import { appendAccuracySnapshot } from "./accuracy-snapshot.js" ;
7
7
8
- interface AccuracyTestConfig {
8
+ export interface AccuracyTestConfig {
9
+ systemPrompt ?: string ;
9
10
prompt : string ;
10
11
expectedToolCalls : ExpectedToolCall [ ] ;
11
12
mockedTools : MockedTools ;
@@ -17,13 +18,7 @@ export function describeAccuracyTests(
17
18
accuracyTestConfigs : AccuracyTestConfig [ ]
18
19
) {
19
20
const accuracyDatetime = process . env . MDB_ACCURACY_DATETIME ;
20
- if ( ! accuracyDatetime ) {
21
- throw new Error ( "MDB_ACCURACY_DATETIME environment variable is not set" ) ;
22
- }
23
21
const accuracyCommit = process . env . MDB_ACCURACY_COMMIT ;
24
- if ( ! accuracyCommit ) {
25
- throw new Error ( "MDB_ACCURACY_COMMIT environment variable is not set" ) ;
26
- }
27
22
28
23
if ( ! models . length ) {
29
24
console . warn ( `No models available to test ${ suiteName } ` ) ;
@@ -53,25 +48,31 @@ export function describeAccuracyTests(
53
48
const toolCalls = testTools . getToolCalls ( ) ;
54
49
const toolCallingAccuracy = toolCallingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
55
50
const parameterMatchingAccuracy = parameterMatchingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
56
- await appendAccuracySnapshot ( {
57
- datetime : accuracyDatetime ,
58
- commit : accuracyCommit ,
59
- model : model . modelName ,
60
- suite : suiteName ,
61
- test : testConfig . prompt ,
62
- toolCallingAccuracy,
63
- parameterAccuracy : parameterMatchingAccuracy ,
64
- } ) ;
51
+ if ( accuracyDatetime && accuracyCommit ) {
52
+ await appendAccuracySnapshot ( {
53
+ datetime : accuracyDatetime ,
54
+ commit : accuracyCommit ,
55
+ model : model . modelName ,
56
+ suite : suiteName ,
57
+ test : testConfig . prompt ,
58
+ toolCallingAccuracy,
59
+ parameterAccuracy : parameterMatchingAccuracy ,
60
+ } ) ;
61
+ } else {
62
+ console . info (
63
+ `Skipping accuracy snapshot update for ${ model . modelName } - ${ suiteName } - ${ testConfig . prompt } `
64
+ ) ;
65
+ }
65
66
66
67
try {
67
68
expect ( toolCallingAccuracy ) . not . toEqual ( 0 ) ;
68
69
expect ( parameterMatchingAccuracy ) . toBeGreaterThanOrEqual ( 0.5 ) ;
69
70
} catch ( error ) {
70
71
console . warn ( `Accuracy test failed for ${ model . modelName } - ${ suiteName } - ${ testConfig . prompt } ` ) ;
71
- console . warn ( `Conversation` , JSON . stringify ( conversation , null , 2 ) ) ;
72
- console . warn ( `Tool calls` , JSON . stringify ( toolCalls , null , 2 ) ) ;
73
- console . warn ( `Tool calling accuracy` , toolCallingAccuracy ) ;
74
- console . warn ( `Parameter matching accuracy` , parameterMatchingAccuracy ) ;
72
+ console . debug ( `Conversation` , JSON . stringify ( conversation , null , 2 ) ) ;
73
+ console . debug ( `Tool calls` , JSON . stringify ( toolCalls , null , 2 ) ) ;
74
+ console . debug ( `Tool calling accuracy` , toolCallingAccuracy ) ;
75
+ console . debug ( `Parameter matching accuracy` , parameterMatchingAccuracy ) ;
75
76
throw error ;
76
77
}
77
78
} ) ;
0 commit comments