@@ -68,6 +68,10 @@ def questions_wrong_file():
6868def questions_answers_file ():
6969 return _get_file ("questions_answers.jsonl" )
7070
71+ @pytest .fixture
72+ def questions_answers_basic_file ():
73+ return _get_file ("questions_answers_basic.jsonl" )
74+
7175
7276def _target_fn (query ):
7377 """An example target function."""
@@ -90,6 +94,15 @@ def _target_fn2(query):
9094 response ["query" ] = f"The query is as follows: { query } "
9195 return response
9296
97+ def _new_answer_target ():
98+ return {"response" : "new response" }
99+
100+ def _question_override_target (query ):
101+ return {"query" : "new query" }
102+
103+ def _question_answer_override_target (query , response ):
104+ return {"query" : "new query" , "response" : "new response" }
105+
93106
94107@pytest .mark .usefixtures ("mock_model_config" )
95108@pytest .mark .unittest
@@ -508,3 +521,103 @@ def test_general_aggregation(self):
508521 assert aggregation ["thing.metric" ] == 3
509522 assert aggregation ["other_thing.other_meteric" ] == - 3
510523 assert aggregation ["final_thing.final_metric" ] == 0.4
524+
525+ @pytest .mark .parametrize ("use_pf_client" , [True , False ])
526+ def test_optional_inputs_with_data (self , questions_file , questions_answers_basic_file , use_pf_client ):
527+ from test_evaluators .test_inputs_evaluators import (
528+ NonOptionalEval ,
529+ HalfOptionalEval ,
530+ OptionalEval ,
531+ NoInputEval
532+ )
533+
534+ # All variants work with both keyworded inputs
535+ results = evaluate (
536+ data = questions_answers_basic_file ,
537+ evaluators = {
538+ "non" : NonOptionalEval (),
539+ "half" : HalfOptionalEval (),
540+ "opt" : OptionalEval (),
541+ "no" : NoInputEval ()
542+ },
543+ _use_pf_client = use_pf_client
544+ ) # type: ignore
545+
546+ first_row = results ["rows" ][0 ]
547+ assert first_row ["outputs.non.non_score" ] == 0
548+ assert first_row ["outputs.half.half_score" ] == 1
549+ assert first_row ["outputs.opt.opt_score" ] == 3
550+ # CodeClient doesn't like no-input evals.
551+ if use_pf_client :
552+ assert first_row ["outputs.no.no_score" ] == 0
553+
554+ # Variant with no default inputs fails on single input
555+ with pytest .raises (EvaluationException ) as exc_info :
556+ evaluate (
557+ data = questions_file ,
558+ evaluators = {
559+ "non" : NonOptionalEval (),
560+ },
561+ _use_pf_client = use_pf_client
562+ ) # type: ignore
563+ assert exc_info ._excinfo [1 ].__str__ () == "Missing required inputs for evaluator non : ['response']." # type: ignore
564+
565+ # Variants with default answer work when only question is inputted
566+ only_question_results = evaluate (
567+ data = questions_file ,
568+ evaluators = {
569+ "half" : HalfOptionalEval (),
570+ "opt" : OptionalEval (),
571+ "no" : NoInputEval ()
572+ },
573+ _use_pf_client = use_pf_client
574+ ) # type: ignore
575+
576+ first_row_2 = only_question_results ["rows" ][0 ]
577+ assert first_row_2 ["outputs.half.half_score" ] == 0
578+ assert first_row_2 ["outputs.opt.opt_score" ] == 1
579+ if use_pf_client :
580+ assert first_row ["outputs.no.no_score" ] == 0
581+
582+ @pytest .mark .parametrize ("use_pf_client" , [True , False ])
583+ def test_optional_inputs_with_target (self , questions_file , questions_answers_basic_file , use_pf_client ):
584+ from test_evaluators .test_inputs_evaluators import EchoEval
585+
586+ # Check that target overrides default inputs
587+ target_answer_results = evaluate (
588+ data = questions_file ,
589+ target = _new_answer_target ,
590+ evaluators = {
591+ "echo" : EchoEval ()
592+ },
593+ _use_pf_client = use_pf_client
594+ ) # type: ignore
595+
596+ assert target_answer_results ['rows' ][0 ]['outputs.echo.echo_query' ] == 'How long is flight from Earth to LV-426?'
597+ assert target_answer_results ['rows' ][0 ]['outputs.echo.echo_response' ] == 'new response'
598+
599+ # Check that target replaces inputs from data (I.E. if both data and target have same output
600+ # the target output is sent to the evaluator.)
601+ question_override_results = evaluate (
602+ data = questions_answers_basic_file ,
603+ target = _question_override_target ,
604+ evaluators = {
605+ "echo" : EchoEval ()
606+ },
607+ _use_pf_client = use_pf_client
608+ ) # type: ignore
609+
610+ assert question_override_results ['rows' ][0 ]['outputs.echo.echo_query' ] == "new query"
611+ assert question_override_results ['rows' ][0 ]['outputs.echo.echo_response' ] == 'There is nothing good there.'
612+
613+ # Check that target can replace default and data inputs at the same time.
614+ double_override_results = evaluate (
615+ data = questions_answers_basic_file ,
616+ target = _question_answer_override_target ,
617+ evaluators = {
618+ "echo" : EchoEval ()
619+ },
620+ _use_pf_client = use_pf_client
621+ ) # type: ignore
622+ assert double_override_results ['rows' ][0 ]['outputs.echo.echo_query' ] == "new query"
623+ assert double_override_results ['rows' ][0 ]['outputs.echo.echo_response' ] == "new response"
0 commit comments