@@ -110,20 +110,18 @@ describe('Judge', () => {
110110 relevance : {
111111 score : 0.8 ,
112112 reasoning : 'The response is relevant to the question' ,
113- judgeConfigKey : 'test-judge' ,
114113 } ,
115114 accuracy : {
116115 score : 0.9 ,
117116 reasoning : 'The response is factually accurate' ,
118- judgeConfigKey : 'test-judge' ,
119117 } ,
120118 helpfulness : {
121119 score : 0.7 ,
122120 reasoning : 'The response provides helpful information' ,
123- judgeConfigKey : 'test-judge' ,
124121 } ,
125122 } ,
126123 success : true ,
124+ judgeConfigKey : 'test-judge' ,
127125 } ) ;
128126
129127 expect ( mockProvider . invokeStructuredModel ) . toHaveBeenCalledWith (
@@ -267,10 +265,11 @@ describe('Judge', () => {
267265 // When one metric is missing, it returns the partial evals it has with success: false
268266 expect ( result ) . toEqual ( {
269267 evals : {
270- relevance : { score : 0.8 , reasoning : 'Good' , judgeConfigKey : 'test-judge' } ,
271- helpfulness : { score : 0.7 , reasoning : 'Helpful' , judgeConfigKey : 'test-judge' } ,
268+ relevance : { score : 0.8 , reasoning : 'Good' } ,
269+ helpfulness : { score : 0.7 , reasoning : 'Helpful' } ,
272270 } ,
273271 success : false ,
272+ judgeConfigKey : 'test-judge' ,
274273 } ) ;
275274 } ) ;
276275
@@ -302,6 +301,7 @@ describe('Judge', () => {
302301 expect ( result ) . toEqual ( {
303302 evals : { } ,
304303 success : false ,
304+ judgeConfigKey : 'test-judge' ,
305305 } ) ;
306306 } ) ;
307307
@@ -315,6 +315,7 @@ describe('Judge', () => {
315315 evals : { } ,
316316 success : false ,
317317 error : 'Provider error' ,
318+ judgeConfigKey : 'test-judge' ,
318319 } ) ;
319320 expect ( mockLogger . error ) . toHaveBeenCalledWith ( 'Judge evaluation failed:' , error ) ;
320321 } ) ;
@@ -328,6 +329,7 @@ describe('Judge', () => {
328329 evals : { } ,
329330 success : false ,
330331 error : 'Unknown error' ,
332+ judgeConfigKey : 'test-judge' ,
331333 } ) ;
332334 } ) ;
333335 } ) ;
@@ -380,20 +382,18 @@ describe('Judge', () => {
380382 relevance : {
381383 score : 0.8 ,
382384 reasoning : 'The response is relevant to the question' ,
383- judgeConfigKey : 'test-judge' ,
384385 } ,
385386 accuracy : {
386387 score : 0.9 ,
387388 reasoning : 'The response is factually accurate' ,
388- judgeConfigKey : 'test-judge' ,
389389 } ,
390390 helpfulness : {
391391 score : 0.7 ,
392392 reasoning : 'The response provides helpful information' ,
393- judgeConfigKey : 'test-judge' ,
394393 } ,
395394 } ,
396395 success : true ,
396+ judgeConfigKey : 'test-judge' ,
397397 } ) ;
398398
399399 expect ( mockProvider . invokeStructuredModel ) . toHaveBeenCalledWith (
@@ -479,9 +479,9 @@ describe('Judge', () => {
479479 const result = parseResponse ( responseData ) ;
480480
481481 expect ( result ) . toEqual ( {
482- relevance : { score : 0.8 , reasoning : 'Good' , judgeConfigKey : 'test-judge' } ,
483- accuracy : { score : 0.9 , reasoning : 'Accurate' , judgeConfigKey : 'test-judge' } ,
484- helpfulness : { score : 0.7 , reasoning : 'Helpful' , judgeConfigKey : 'test-judge' } ,
482+ relevance : { score : 0.8 , reasoning : 'Good' } ,
483+ accuracy : { score : 0.9 , reasoning : 'Accurate' } ,
484+ helpfulness : { score : 0.7 , reasoning : 'Helpful' } ,
485485 } ) ;
486486 } ) ;
487487
@@ -514,7 +514,7 @@ describe('Judge', () => {
514514
515515 // Only helpfulness passes validation, relevance and accuracy are skipped
516516 expect ( result ) . toEqual ( {
517- helpfulness : { score : 0.7 , reasoning : 'Helpful' , judgeConfigKey : 'test-judge' } ,
517+ helpfulness : { score : 0.7 , reasoning : 'Helpful' } ,
518518 } ) ;
519519 } ) ;
520520 } ) ;
0 commit comments