@@ -37,21 +37,49 @@ export async function test_chatbot_by_dataset(ds: any) {
3737 // Resource request
3838 if ( test_result . response != test_data . answer ) {
3939 console . log (
40- 'html resource request did not match. LLM response :' +
40+ 'html resource request did not match for prompt' +
41+ test_data . question +
42+ '. LLM response :' +
4143 test_result . response +
4244 ' Actual response: ' +
4345 test_data . answer
4446 )
4547 }
4648 } else if ( test_result . action == 'summary' ) {
47- validate_summary_output ( test_result . response , test_data . answer )
49+ const validated_llm_summary_output = validate_summary_output ( test_result . response , test_data . answer )
50+ if ( ! validated_llm_summary_output )
51+ console . log (
52+ 'Summary output did not match for prompt' +
53+ test_data . question +
54+ '. LLM response :' +
55+ test_result . response +
56+ ' Actual response: ' +
57+ test_data . answer
58+ )
4859 } else if ( test_result . action == 'dge' ) {
4960 if ( test_result . response != test_data . answer ) {
50- //console.log("DE request did not match. LLM response :" + JSON.stringify(test_result.response) + " Actual response: " + JSON.stringify(test_data.answer))
51- validate_DE_output ( test_result . response , test_data . answer )
61+ const validated_llm_DE_output = validate_DE_output ( test_result . response , test_data . answer )
62+ if ( ! validated_llm_DE_output )
63+ console . log (
64+ 'DE output did not match for prompt' +
65+ test_data . question +
66+ '. LLM response :' +
67+ test_result . response +
68+ ' Actual response: ' +
69+ test_data . answer
70+ )
5271 }
5372 } else if ( test_result . action == 'matrix' ) {
54- validate_matrix_output ( test_result . response , test_data . answer )
73+ const validated_llm_matrix_output = validate_matrix_output ( test_result . response , test_data . answer )
74+ if ( ! validated_llm_matrix_output )
75+ console . log (
76+ 'Matrix output did not match for prompt' +
77+ test_data . question +
78+ '. LLM response :' +
79+ test_result . response +
80+ ' Actual response: ' +
81+ test_data . answer
82+ )
5583 }
5684 }
5785}
0 commit comments