77from azure .ai .evaluation .simulator ._utils import JsonLineChatProtocol , JsonLineList
88from azure .core .credentials import TokenCredential
99
10+
1011@pytest .fixture
1112def mock_credential ():
1213 return MagicMock (spec = TokenCredential )
1314
15+
1416@pytest .fixture
1517def mock_model_config_dict_valid ():
1618 return {
@@ -19,51 +21,58 @@ def mock_model_config_dict_valid():
1921 "type" : "azure_openai" ,
2022 }
2123
24+
2225@pytest .fixture
2326def mock_model_config_dict_invalid ():
2427 return {
2528 "type" : "azure_openai" ,
2629 }
2730
31+
2832@pytest .fixture
2933def mock_target ():
3034 def mock_target_fn () -> str :
3135 return "mock response"
36+
3237 return mock_target_fn
3338
39+
3440@pytest .fixture
3541def mock_target_with_context ():
3642 def mock_target_with_context_fn () -> tuple :
3743 return ("mock response" , "mock context" )
44+
3845 return mock_target_with_context_fn
3946
47+
4048@pytest .fixture
4149def mock_eval_result_dict ():
4250 jailbreak = {
43- ' rows' : [
51+ " rows" : [
4452 {
45- ' outputs.content_safety.hate_unfairness_score' : 0.0 ,
46- ' outputs.content_safety.self_harm_score' : 0.0 ,
47- ' outputs.content_safety.sexual_score' : 0.0 ,
48- ' outputs.content_safety.violence_score' : 0.0 ,
53+ " outputs.content_safety.hate_unfairness_score" : 0.0 ,
54+ " outputs.content_safety.self_harm_score" : 0.0 ,
55+ " outputs.content_safety.sexual_score" : 0.0 ,
56+ " outputs.content_safety.violence_score" : 0.0 ,
4957 }
5058 ],
51- ' metrics' : {},
52- ' studio_url' : ' some url'
59+ " metrics" : {},
60+ " studio_url" : " some url" ,
5361 }
5462 regular = {
55- ' rows' : [
63+ " rows" : [
5664 {
57- ' outputs.content_safety.hate_unfairness_score' : 0.0 ,
58- ' outputs.content_safety.self_harm_score' : 0.0 ,
59- ' outputs.content_safety.sexual_score' : 0.0 ,
60- ' outputs.content_safety.violence_score' : 0.0 ,
65+ " outputs.content_safety.hate_unfairness_score" : 0.0 ,
66+ " outputs.content_safety.self_harm_score" : 0.0 ,
67+ " outputs.content_safety.sexual_score" : 0.0 ,
68+ " outputs.content_safety.violence_score" : 0.0 ,
6169 }
6270 ],
63- ' metrics' : {},
64- ' studio_url' : ' some url'
71+ " metrics" : {},
72+ " studio_url" : " some url" ,
6573 }
66- return {'jailbreak' : jailbreak , 'regular' : regular }
74+ return {"jailbreak" : jailbreak , "regular" : regular }
75+
6776
6877@pytest .fixture
6978def safety_eval (mock_model_config_dict_valid , mock_credential ):
@@ -73,13 +82,15 @@ def safety_eval(mock_model_config_dict_valid, mock_credential):
7382 model_config = mock_model_config_dict_valid ,
7483 )
7584
85+
7686@pytest .fixture
7787def safety_eval_no_model_config (mock_credential ):
7888 return _SafetyEvaluation (
7989 azure_ai_project = {"subscription_id" : "mock-sub" , "resource_group_name" : "mock-rg" , "project_name" : "mock-proj" },
8090 credential = mock_credential ,
8191 )
8292
93+
8394@pytest .mark .usefixtures ("mock_model_config" )
8495@pytest .mark .unittest
8596class TestSafetyEvaluation :
@@ -91,7 +102,7 @@ def test_validate_model_config_missing_keys(self, mock_credential, mock_model_co
91102 model_config = mock_model_config_dict_invalid ,
92103 )
93104 assert "missing required keys" in str (exc_info .value )
94-
105+
95106 def test_get_evaluators_invalid (self , safety_eval ):
96107 with pytest .raises (EvaluationException ) as exc_info :
97108 safety_eval ._get_evaluators ([None ]) # type: ignore
@@ -116,12 +127,11 @@ def test_validate_inputs_groundedness_no_source(self, safety_eval, mock_target):
116127 source_text = None ,
117128 )
118129 assert "requires either source_text" in str (exc_info .value )
119-
130+
120131 def test_validate_inputs_multi_turn_scenario (self , safety_eval , mock_target ):
121132 with pytest .raises (EvaluationException ) as exc_info :
122133 safety_eval ._validate_inputs (
123134 target = mock_target ,
124-
125135 evaluators = [_SafetyEvaluator .CONTENT_SAFETY ],
126136 scenario = AdversarialScenario .ADVERSARIAL_SUMMARIZATION ,
127137 num_turns = 3 ,
@@ -135,8 +145,7 @@ def test_validate_inputs_no_model_config(self, safety_eval_no_model_config, mock
135145 evaluators = [_SafetyEvaluator .COHERENCE ],
136146 )
137147 assert "Model configuration is required" in str (exc_info .value )
138-
139-
148+
140149 def test_validate_inputs_scenario_not_content_safety (self , safety_eval , mock_target ):
141150 with pytest .raises (EvaluationException ) as exc_info :
142151 safety_eval ._validate_inputs (
@@ -145,17 +154,17 @@ def test_validate_inputs_scenario_not_content_safety(self, safety_eval, mock_tar
145154 scenario = AdversarialScenario .ADVERSARIAL_SUMMARIZATION ,
146155 )
147156 assert "not supported without content safety evaluation" in str (exc_info .value )
148-
157+
149158 def test_calculate_defect_rate (self , safety_eval , mock_eval_result_dict ):
150159 eval_result = safety_eval ._calculate_defect_rate (mock_eval_result_dict )
151160 assert eval_result is not None
152- assert isinstance (eval_result [' metrics' ][ ' content_safety.violence_defect_rate' ], float )
153- assert eval_result [' metrics' ][ ' content_safety.violence_defect_rate' ] == 0.0
154-
161+ assert isinstance (eval_result [" metrics" ][ " content_safety.violence_defect_rate" ], float )
162+ assert eval_result [" metrics" ][ " content_safety.violence_defect_rate" ] == 0.0
163+
155164 @pytest .mark .asyncio
156165 @patch ("azure.ai.evaluation.simulator._simulator.Simulator.__call__" , new_callable = AsyncMock )
157166 async def test_simulate_no_scenario (self , mock__call__ , safety_eval , mock_target ):
158- mock__call__ .return_value = [JsonLineChatProtocol ({"messages" :[]})]
167+ mock__call__ .return_value = [JsonLineChatProtocol ({"messages" : []})]
159168 results = await safety_eval ._simulate (target = mock_target )
160169 assert isinstance (results , dict )
161170 assert isinstance (results ["regular" ], str )
@@ -164,27 +173,26 @@ async def test_simulate_no_scenario(self, mock__call__, safety_eval, mock_target
164173 @patch ("azure.ai.evaluation.simulator.DirectAttackSimulator.__init__" , return_value = None )
165174 @patch ("azure.ai.evaluation.simulator.DirectAttackSimulator.__call__" , new_callable = AsyncMock )
166175 async def test_simulate_direct_attack (self , mock_call , mock_init , safety_eval , mock_target ):
167- mock_call .return_value = {"jailbreak" :JsonLineList ([{"messages" : []}]),"regular" :JsonLineList ([{"messages" : []}])}
176+ mock_call .return_value = {
177+ "jailbreak" : JsonLineList ([{"messages" : []}]),
178+ "regular" : JsonLineList ([{"messages" : []}]),
179+ }
168180
169181 results = await safety_eval ._simulate (
170- target = mock_target ,
171- direct_attack = True ,
172- adversarial_scenario = AdversarialScenario .ADVERSARIAL_QA
182+ target = mock_target , direct_attack = True , adversarial_scenario = AdversarialScenario .ADVERSARIAL_QA
173183 )
174184 assert isinstance (results , dict )
175185 assert isinstance (results ["regular" ], str )
176186 assert isinstance (results ["jailbreak" ], str )
177187
178-
179188 @pytest .mark .asyncio
180189 @patch ("azure.ai.evaluation.simulator.IndirectAttackSimulator.__init__" , return_value = None )
181190 @patch ("azure.ai.evaluation.simulator.IndirectAttackSimulator.__call__" , new_callable = AsyncMock )
182191 async def test_simulate_indirect_jailbreak (self , mock_call , mock_init , safety_eval , mock_target ):
183- mock_call .return_value = JsonLineList ([{"messages" :[]}])
192+ mock_call .return_value = JsonLineList ([{"messages" : []}])
184193
185194 results = await safety_eval ._simulate (
186- target = mock_target ,
187- adversarial_scenario = AdversarialScenarioJailbreak .ADVERSARIAL_INDIRECT_JAILBREAK
195+ target = mock_target , adversarial_scenario = AdversarialScenarioJailbreak .ADVERSARIAL_INDIRECT_JAILBREAK
188196 )
189197 assert isinstance (results , dict )
190198 assert isinstance (results ["regular" ], str )
@@ -193,10 +201,9 @@ async def test_simulate_indirect_jailbreak(self, mock_call, mock_init, safety_ev
193201 @patch ("azure.ai.evaluation.simulator.AdversarialSimulator.__init__" , return_value = None )
194202 @patch ("azure.ai.evaluation.simulator.AdversarialSimulator.__call__" , new_callable = AsyncMock )
195203 async def test_simulate_adversarial (self , mock_call , mock_init , safety_eval , mock_target ):
196- mock_call .return_value = JsonLineList ([{"messages" :[]}])
204+ mock_call .return_value = JsonLineList ([{"messages" : []}])
197205 results = await safety_eval ._simulate (
198- target = mock_target ,
199- adversarial_scenario = AdversarialScenario .ADVERSARIAL_QA
206+ target = mock_target , adversarial_scenario = AdversarialScenario .ADVERSARIAL_QA
200207 )
201208 assert isinstance (results , dict )
202209 assert isinstance (results ["regular" ], str )
@@ -209,7 +216,6 @@ async def test_simulate_no_results(self, mock_call, mock_init, safety_eval, mock
209216 mock_call .return_value = None
210217 with pytest .raises (EvaluationException ) as exc_info :
211218 results = await safety_eval ._simulate (
212- target = mock_target ,
213- adversarial_scenario = AdversarialScenario .ADVERSARIAL_QA
219+ target = mock_target , adversarial_scenario = AdversarialScenario .ADVERSARIAL_QA
214220 )
215- assert "outputs generated by the simulator" in str (exc_info .value )
221+ assert "outputs generated by the simulator" in str (exc_info .value )
0 commit comments