Skip to content

Commit 5364628

Browse files
authored
_DSBEvaluations --> _SafetyEvaluation (Azure#39671)
* Adding asks arg to dsb evals * adding unit tests and coherence evaluator * adding evaluation_name param * updates * adding more unit tests * updates * make model config optional * make evaluators empty by default * add adversarial qa as default scenario * add pyrit dependency and sample * removing pyrit dependency for now * updating conftest and test recordings * ci please be happy with me * update test recording
1 parent 74c2b2f commit 5364628

File tree

7 files changed

+675
-99
lines changed

7 files changed

+675
-99
lines changed

sdk/evaluation/azure-ai-evaluation/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/evaluation/azure-ai-evaluation",
5-
"Tag": "python/evaluation/azure-ai-evaluation_995c25afac"
5+
"Tag": "python/evaluation/azure-ai-evaluation_398d07a53e"
66
}

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_dsb_evaluation/_dsb_evaluation.py renamed to sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py

Lines changed: 205 additions & 96 deletions
Large diffs are not rendered by default.
Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
# coding: utf-8
2+
# type: ignore
3+
4+
# -------------------------------------------------------------------------
5+
# Copyright (c) Microsoft Corporation. All rights reserved.
6+
# Licensed under the MIT License. See License.txt in the project root for
7+
# license information.
8+
# --------------------------------------------------------------------------
9+
"""
10+
DESCRIPTION:
11+
These samples demonstrate usage of _SafetyEvaluation class with various _SafetyEvaluator instances.
12+
13+
USAGE:
14+
python evaluation_samples_safety_evaluation.py
15+
16+
Set the environment variables with your own values before running the sample:
17+
1) AZURE_OPENAI_ENDPOINT
18+
2) AZURE_OPENAI_API_VERSION
19+
3) AZURE_OPENAI_DEPLOYMENT
20+
4) AZURE_SUBSCRIPTION_ID
21+
5) AZURE_RESOURCE_GROUP_NAME
22+
6) AZURE_PROJECT_NAME
23+
24+
"""
25+
26+
class EvaluationSafetyEvaluationSamples(object):
27+
def evaluation_safety_evaluation_classes_methods(self):
28+
import os
29+
import asyncio
30+
from azure.ai.evaluation._safety_evaluation._safety_evaluation import _SafetyEvaluation, _SafetyEvaluator
31+
from azure.ai.evaluation.simulator import AdversarialScenario
32+
from azure.identity import DefaultAzureCredential
33+
# [START default_safety_evaluation]
34+
def test_target(query: str) -> str:
35+
return "some response"
36+
37+
azure_ai_project = {
38+
"subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
39+
"resource_group_name": os.environ.get("AZURE_RESOURCE_GROUP_NAME"),
40+
"project_name": os.environ.get("AZURE_PROJECT_NAME"),
41+
}
42+
43+
credential = DefaultAzureCredential()
44+
45+
safety_evaluation_default = _SafetyEvaluation(azure_ai_project=azure_ai_project, credential=credential)
46+
safety_evaluation_default_results = asyncio.run(safety_evaluation_default(
47+
target=test_target,
48+
))
49+
# [END default_safety_evaluation]
50+
51+
# [START content_safety_safety_evaluation]
52+
53+
def test_target(query: str) -> str:
54+
return "some response"
55+
56+
azure_ai_project = {
57+
"subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
58+
"resource_group_name": os.environ.get("AZURE_RESOURCE_GROUP_NAME"),
59+
"project_name": os.environ.get("AZURE_PROJECT_NAME"),
60+
}
61+
62+
credential = DefaultAzureCredential()
63+
64+
safety_evaluation_content_safety_single_turn = _SafetyEvaluation(azure_ai_project=azure_ai_project, credential=credential)
65+
66+
safety_evaluation_content_safety_single_turn_results = asyncio.run(safety_evaluation_content_safety_single_turn(
67+
evaluators=[_SafetyEvaluator.CONTENT_SAFETY],
68+
evaluation_name="some evaluation",
69+
target=test_target,
70+
num_turns=1,
71+
num_rows=3,
72+
output_path="evaluation_outputs_safety_single_turn.jsonl",
73+
))
74+
# [END content_safety_safety_evaluation]
75+
76+
# [START content_safety_multi_turn_safety_evaluation]
77+
def test_target(query: str) -> str:
78+
return "some response"
79+
80+
azure_ai_project = {
81+
"subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
82+
"resource_group_name": os.environ.get("AZURE_RESOURCE_GROUP_NAME"),
83+
"project_name": os.environ.get("AZURE_PROJECT_NAME"),
84+
}
85+
86+
credential = DefaultAzureCredential()
87+
88+
safety_evaluation_content_safety_multi_turn = _SafetyEvaluation(azure_ai_project=azure_ai_project, credential=credential)
89+
90+
safety_evaluation_content_safety_multi_turn_results = asyncio.run(safety_evaluation_content_safety_multi_turn(
91+
evaluators=[_SafetyEvaluator.CONTENT_SAFETY],
92+
target=test_target,
93+
num_turns=3,
94+
num_rows=3,
95+
output_path="evaluation_outputs_safety_multi_turn.jsonl",
96+
))
97+
98+
# [END content_safety_multi_turn_safety_evaluation]
99+
100+
# [START content_safety_scenario_safety_evaluation]
101+
102+
def test_target(query: str) -> str:
103+
return "some response"
104+
105+
azure_ai_project = {
106+
"subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
107+
"resource_group_name": os.environ.get("AZURE_RESOURCE_GROUP_NAME"),
108+
"project_name": os.environ.get("AZURE_PROJECT_NAME"),
109+
}
110+
111+
credential = DefaultAzureCredential()
112+
113+
safety_evaluation_content_safety_scenario = _SafetyEvaluation(azure_ai_project=azure_ai_project, credential=credential)
114+
115+
safety_evaluation_content_safety_scenario_results = asyncio.run(safety_evaluation_content_safety_scenario(
116+
evaluators=[_SafetyEvaluator.CONTENT_SAFETY],
117+
target=test_target,
118+
scenario=AdversarialScenario.ADVERSARIAL_SUMMARIZATION,,
119+
num_rows=3,
120+
output_path="evaluation_outputs_safety_scenario.jsonl",
121+
))
122+
123+
# [END content_safety_scenario_safety_evaluation]
124+
125+
# [START protected_material_safety_evaluation]
126+
def test_target(query: str) -> str:
127+
return "some response"
128+
129+
azure_ai_project = {
130+
"subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
131+
"resource_group_name": os.environ.get("AZURE_RESOURCE_GROUP_NAME"),
132+
"project_name": os.environ.get("AZURE_PROJECT_NAME"),
133+
}
134+
135+
credential = DefaultAzureCredential()
136+
137+
safety_evaluation_protected_material = _SafetyEvaluation(azure_ai_project=azure_ai_project, credential=credential)
138+
139+
safety_evaluation_protected_material_results = asyncio.run(safety_evaluation_protected_material(
140+
evaluators=[_SafetyEvaluator.PROTECTED_MATERIAL],
141+
target=test_target,
142+
num_turns=1,
143+
num_rows=3,
144+
output_path="evaluation_outputs_protected_material.jsonl",
145+
))
146+
147+
# [END protected_material_safety_evaluation]
148+
149+
# [START groundedness_safety_evaluation]
150+
def test_target(query: str) -> str:
151+
return "some response"
152+
153+
grounding_data = "some grounding data"
154+
155+
azure_ai_project = {
156+
"subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
157+
"resource_group_name": os.environ.get("AZURE_RESOURCE_GROUP_NAME"),
158+
"project_name": os.environ.get("AZURE_PROJECT_NAME"),
159+
}
160+
161+
credential = DefaultAzureCredential()
162+
163+
safety_evaluation_groundedness = _SafetyEvaluation(azure_ai_project=azure_ai_project, credential=credential)
164+
safety_evaluation_groundedness_results = asyncio.run(safety_evaluation_groundedness(
165+
evaluators=[_SafetyEvaluator.GROUNDEDNESS],
166+
target=test_target,
167+
source_text=grounding_data,
168+
num_rows=3,
169+
output_path="evaluation_outputs_groundedness.jsonl",
170+
))
171+
172+
# [END groundedness_safety_evaluation]
173+
174+
# [START quality_safety_evaluation]
175+
def test_target(query: str) -> str:
176+
return "some response"
177+
178+
azure_ai_project = {
179+
"subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
180+
"resource_group_name": os.environ.get("AZURE_RESOURCE_GROUP_NAME"),
181+
"project_name": os.environ.get("AZURE_PROJECT_NAME"),
182+
}
183+
184+
credential = DefaultAzureCredential()
185+
186+
safety_evaluation_quality = _SafetyEvaluation(azure_ai_project=azure_ai_project, credential=credential)
187+
188+
safety_evaluation_quality_results = asyncio.run(safety_evaluation_quality(
189+
evaluators=[_SafetyEvaluator.RELEVANCE, _SafetyEvaluator.COHERENCE, _SafetyEvaluator.FLUENCY],
190+
target=test_target,
191+
num_turns=1,
192+
num_rows=3,
193+
output_path="evaluation_outputs_quality.jsonl",
194+
))
195+
196+
# [END quality_safety_evaluation]
197+
198+
# [START xpia_safety_evaluation]
199+
200+
def test_target(query: str) -> str:
201+
return "some response"
202+
203+
azure_ai_project = {
204+
"subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
205+
"resource_group_name": os.environ.get("AZURE_RESOURCE_GROUP_NAME"),
206+
"project_name": os.environ.get("AZURE_PROJECT_NAME"),
207+
}
208+
209+
credential = DefaultAzureCredential()
210+
211+
safety_evaluation_xpia = _SafetyEvaluation(azure_ai_project=azure_ai_project, credential=credential)
212+
213+
safety_evaluation_xpia_results = asyncio.run(safety_evaluation_xpia(
214+
evaluators=[_SafetyEvaluator.INDIRECT_ATTACK],
215+
target=test_target,
216+
num_turns=1,
217+
num_rows=3,
218+
output_path="evaluation_outputs_xpia.jsonl",
219+
))
220+
221+
# [END xpia_safety_evaluation]
222+
223+
# [START upia_safety_evaluation]
224+
def test_target(query: str) -> str:
225+
return "some response"
226+
227+
azure_ai_project = {
228+
"subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
229+
"resource_group_name": os.environ.get("AZURE_RESOURCE_GROUP_NAME"),
230+
"project_name": os.environ.get("AZURE_PROJECT_NAME"),
231+
}
232+
233+
credential = DefaultAzureCredential()
234+
235+
safety_evaluation_upia = _SafetyEvaluation(azure_ai_project=azure_ai_project, credential=credential)
236+
safety_evaluation_upia_results = asyncio.run(safety_evaluation_upia(
237+
evaluators=[_SafetyEvaluator.DIRECT_ATTACK],
238+
target=test_target,
239+
num_turns=1,
240+
num_rows=3,
241+
output_path="evaluation_outputs_upia.jsonl",
242+
))
243+
# [END upia_safety_evaluation]
244+
245+
if __name__ == "__main__":
246+
print("Loading samples in evaluation_samples_safety_evaluation.py")
247+
sample = EvaluationSafetyEvaluationSamples()
248+
print("Samples loaded successfully!")
249+
print("Running samples in evaluation_samples_safety_evaluation.py")
250+
sample.evaluation_safety_evaluation_classes_methods()
251+
print("Samples ran successfully!")

sdk/evaluation/azure-ai-evaluation/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
"azure-identity>=1.16.0",
7272
"azure-core>=1.30.2",
7373
"nltk>=3.9.1",
74-
"azure-storage-blob>=12.10.0",
74+
"azure-storage-blob>=12.10.0"
7575
],
7676
project_urls={
7777
"Bug Reports": "https://github.com/Azure/azure-sdk-for-python/issues",

sdk/evaluation/azure-ai-evaluation/tests/conftest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,9 @@ def evalutation_run_sanitizer() -> None:
196196
add_body_key_sanitizer(json_path="$..userTenantId", value=ZERO_GUID)
197197
add_body_key_sanitizer(json_path="$..upn", value="Sanitized")
198198

199-
# remove the stainless retry header since it is causing some unnecessary mismatches in recordings
199+
# remove the stainless retry header and read timeout since it is causing some unnecessary mismatches in recordings
200200
add_batch_sanitizers({Sanitizer.REMOVE_HEADER: [{"headers": "x-stainless-retry-count"}]})
201+
add_batch_sanitizers({Sanitizer.REMOVE_HEADER: [{"headers": "x-stainless-read-timeout"}]})
201202

202203
azure_workspace_triad_sanitizer()
203204
azureopenai_connection_sanitizer()

0 commit comments

Comments
 (0)