Skip to content

Commit be5ff0b

Browse files
authored
[redteam] agent risk categories and refactor XPIA into attack strategy (#43054)
* Refactor XPIA into AttackStrategy * RedTeam context handling improvements * fix formatting for xpia prompts * updates * updates * updates working multiple contexts * context without tool_name / context_type * updates * risk subtypes * sync step 1 * risk subtypes pt 2 * updates * undo sync updates until updated typespec * add new risk categories * custom attack objectives behavioral change * fix target for attack objectives * update client * formatting * ensure consistency between orchestrators behavior * old flow working * create sync eval working * eval result being parsed properly!! * temp commit to call vienna endpoint for sync evals * sdl pipeliene working local * fix num objectives for xpia * taxonomy support * remove local rai service config logic * formatting * undo int url hardcoding * copilot comment * revert change to sample * updated projects api version to 11-15 * spell check errors * make xpia easy * fix import * fix all imports of projectsclient * last import fix I hope * update pandas for 3.14 * start fix tests, remove experimental tags from safety eval, undo setup changes * run formatter * app insights changes commented out * formatting * updates to client * fix evaluator name * fix unit tests * fix some e2e tests * fix formatting * fix some e2e tests * update recordings again * add back experimental tags for content safety evaluators for now
1 parent 3d25433 commit be5ff0b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+14999
-5246
lines changed

sdk/evaluation/azure-ai-evaluation/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/evaluation/azure-ai-evaluation",
5-
"Tag": "python/evaluation/azure-ai-evaluation_d7b00f22b8"
5+
"Tag": "python/evaluation/azure-ai-evaluation_b613e35220"
66
}

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@
66
# that would have otherwise been a relative import scoped to single evaluator directories.
77

88
from . import constants
9-
from .rai_service import evaluate_with_rai_service
9+
from .rai_service import evaluate_with_rai_service, evaluate_with_rai_service_sync
1010
from .utils import get_harm_severity_level
1111
from .evaluation_onedp_client import EvaluationServiceOneDPClient
1212
from .onedp.models import EvaluationUpload, EvaluationResult, RedTeamUpload, ResultType
1313

1414
__all__ = [
1515
"get_harm_severity_level",
1616
"evaluate_with_rai_service",
17+
"evaluate_with_rai_service_sync",
1718
"constants",
1819
"EvaluationServiceOneDPClient",
1920
"EvaluationResult",

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ class Tasks:
5050
GROUNDEDNESS = "groundedness"
5151
CODE_VULNERABILITY = "code vulnerability"
5252
UNGROUNDED_ATTRIBUTES = "inference sensitive attributes"
53+
SENSITIVE_DATA_LEAKAGE = "sensitive_data_leakage"
54+
TASK_ADHERENCE = "task_adherence"
55+
PROHIBITED_ACTIONS = "prohibited_actions"
5356

5457

5558
class _InternalAnnotationTasks:
@@ -74,6 +77,9 @@ class EvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
7477
GROUNDEDNESS = "generic_groundedness"
7578
CODE_VULNERABILITY = "code_vulnerability"
7679
UNGROUNDED_ATTRIBUTES = "ungrounded_attributes"
80+
SENSITIVE_DATA_LEAKAGE = "sensitive_data_leakage"
81+
TASK_ADHERENCE = "task_adherence"
82+
PROHIBITED_ACTIONS = "prohibited_actions"
7783

7884

7985
class _InternalEvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/evaluation_onedp_client.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import logging
66
from typing import Union, Any, Dict
77
from azure.core.credentials import AzureKeyCredential, TokenCredential
8-
from azure.ai.evaluation._common.onedp import AIProjectClient as RestEvaluationServiceClient
8+
from azure.ai.evaluation._common.onedp import ProjectsClient as RestEvaluationServiceClient
99
from azure.ai.evaluation._common.onedp.models import (
1010
PendingUploadRequest,
1111
PendingUploadType,
@@ -71,7 +71,7 @@ def create_evaluation_result(
7171
)
7272
start_pending_upload_response = self.rest_client.evaluation_results.start_pending_upload(
7373
name=name,
74-
version=version,
74+
version=str(version),
7575
body=PendingUploadRequest(pending_upload_type=PendingUploadType.TEMPORARY_BLOB_REFERENCE),
7676
**kwargs,
7777
)
@@ -84,15 +84,15 @@ def create_evaluation_result(
8484

8585
LOGGER.debug(f"Creating evaluation result version for {name} with version {version}")
8686
create_version_response = self.rest_client.evaluation_results.create_or_update_version(
87-
body=EvaluationResult(
87+
evaluation_result=EvaluationResult(
8888
blob_uri=start_pending_upload_response.blob_reference_for_consumption.blob_uri,
8989
result_type=result_type,
9090
name=name,
91-
version=version,
91+
version=str(version),
9292
metrics=metrics,
9393
),
9494
name=name,
95-
version=version,
95+
version=str(version),
9696
**kwargs,
9797
)
9898

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/onedp/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
if TYPE_CHECKING:
1313
from ._patch import * # pylint: disable=unused-wildcard-import
1414

15-
from ._client import AIProjectClient # type: ignore
15+
from ._client import ProjectsClient # type: ignore
1616
from ._version import VERSION
1717

1818
__version__ = VERSION
@@ -25,7 +25,7 @@
2525
from ._patch import patch_sdk as _patch_sdk
2626

2727
__all__ = [
28-
"AIProjectClient",
28+
"ProjectsClient",
2929
]
3030
__all__.extend([p for p in _patch_all if p not in __all__]) # pyright: ignore
3131

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/onedp/_client.py

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,58 +14,78 @@
1414
from azure.core.pipeline import policies
1515
from azure.core.rest import HttpRequest, HttpResponse
1616

17-
from ._configuration import AIProjectClientConfiguration
17+
from ._configuration import ProjectsClientConfiguration
1818
from ._utils.serialization import Deserializer, Serializer
1919
from .operations import (
2020
ConnectionsOperations,
2121
DatasetsOperations,
2222
DeploymentsOperations,
2323
EvaluationResultsOperations,
24+
EvaluationRulesOperations,
25+
EvaluationTaxonomiesOperations,
2426
EvaluationsOperations,
27+
EvaluatorsOperations,
2528
IndexesOperations,
29+
InsightsOperations,
2630
RedTeamsOperations,
31+
SchedulesOperations,
32+
SyncEvalsOperations,
2733
)
2834

2935
if TYPE_CHECKING:
3036
from azure.core.credentials import TokenCredential
3137

3238

33-
class AIProjectClient: # pylint: disable=too-many-instance-attributes
34-
"""AIProjectClient.
39+
class ProjectsClient: # pylint: disable=too-many-instance-attributes
40+
"""ProjectsClient.
3541
3642
:ivar connections: ConnectionsOperations operations
37-
:vartype connections: azure.ai.projects.onedp.operations.ConnectionsOperations
43+
:vartype connections: azure.ai.projects.operations.ConnectionsOperations
44+
:ivar sync_evals: SyncEvalsOperations operations
45+
:vartype sync_evals: azure.ai.projects.operations.SyncEvalsOperations
3846
:ivar evaluations: EvaluationsOperations operations
39-
:vartype evaluations: azure.ai.projects.onedp.operations.EvaluationsOperations
47+
:vartype evaluations: azure.ai.projects.operations.EvaluationsOperations
48+
:ivar evaluators: EvaluatorsOperations operations
49+
:vartype evaluators: azure.ai.projects.operations.EvaluatorsOperations
4050
:ivar datasets: DatasetsOperations operations
41-
:vartype datasets: azure.ai.projects.onedp.operations.DatasetsOperations
51+
:vartype datasets: azure.ai.projects.operations.DatasetsOperations
4252
:ivar indexes: IndexesOperations operations
43-
:vartype indexes: azure.ai.projects.onedp.operations.IndexesOperations
53+
:vartype indexes: azure.ai.projects.operations.IndexesOperations
54+
:ivar insights: InsightsOperations operations
55+
:vartype insights: azure.ai.projects.operations.InsightsOperations
4456
:ivar deployments: DeploymentsOperations operations
45-
:vartype deployments: azure.ai.projects.onedp.operations.DeploymentsOperations
57+
:vartype deployments: azure.ai.projects.operations.DeploymentsOperations
4658
:ivar red_teams: RedTeamsOperations operations
47-
:vartype red_teams: azure.ai.projects.onedp.operations.RedTeamsOperations
59+
:vartype red_teams: azure.ai.projects.operations.RedTeamsOperations
60+
:ivar evaluation_taxonomies: EvaluationTaxonomiesOperations operations
61+
:vartype evaluation_taxonomies: azure.ai.projects.operations.EvaluationTaxonomiesOperations
62+
:ivar schedules: SchedulesOperations operations
63+
:vartype schedules: azure.ai.projects.operations.SchedulesOperations
4864
:ivar evaluation_results: EvaluationResultsOperations operations
49-
:vartype evaluation_results: azure.ai.projects.onedp.operations.EvaluationResultsOperations
65+
:vartype evaluation_results: azure.ai.projects.operations.EvaluationResultsOperations
66+
:ivar evaluation_rules: EvaluationRulesOperations operations
67+
:vartype evaluation_rules: azure.ai.projects.operations.EvaluationRulesOperations
5068
:param endpoint: Project endpoint. In the form
51-
"https://<your-ai-services-account-name>.services.ai.azure.com/api/projects/_project"
69+
"`https://your-ai-services-account-name.services.ai.azure.com/api/projects/_project
70+
<https://your-ai-services-account-name.services.ai.azure.com/api/projects/_project>`_"
5271
if your Foundry Hub has only one Project, or to use the default Project in your Hub. Or in the
5372
form
54-
"https://<your-ai-services-account-name>.services.ai.azure.com/api/projects/<your-project-name>"
73+
"`https://your-ai-services-account-name.services.ai.azure.com/api/projects/your-project-name
74+
<https://your-ai-services-account-name.services.ai.azure.com/api/projects/your-project-name>`_"
5575
if you want to explicitly
5676
specify the Foundry Project name. Required.
5777
:type endpoint: str
5878
:param credential: Credential used to authenticate requests to the service. Required.
5979
:type credential: ~azure.core.credentials.TokenCredential
6080
:keyword api_version: The API version to use for this operation. Default value is
61-
"2025-05-15-preview". Note that overriding this default value may result in unsupported
81+
"2025-11-15-preview". Note that overriding this default value may result in unsupported
6282
behavior.
6383
:paramtype api_version: str
6484
"""
6585

6686
def __init__(self, endpoint: str, credential: "TokenCredential", **kwargs: Any) -> None:
6787
_endpoint = "{endpoint}"
68-
self._config = AIProjectClientConfiguration(endpoint=endpoint, credential=credential, **kwargs)
88+
self._config = ProjectsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs)
6989

7090
_policies = kwargs.pop("policies", None)
7191
if _policies is None:
@@ -90,14 +110,24 @@ def __init__(self, endpoint: str, credential: "TokenCredential", **kwargs: Any)
90110
self._deserialize = Deserializer()
91111
self._serialize.client_side_validation = False
92112
self.connections = ConnectionsOperations(self._client, self._config, self._serialize, self._deserialize)
113+
self.sync_evals = SyncEvalsOperations(self._client, self._config, self._serialize, self._deserialize)
93114
self.evaluations = EvaluationsOperations(self._client, self._config, self._serialize, self._deserialize)
115+
self.evaluators = EvaluatorsOperations(self._client, self._config, self._serialize, self._deserialize)
94116
self.datasets = DatasetsOperations(self._client, self._config, self._serialize, self._deserialize)
95117
self.indexes = IndexesOperations(self._client, self._config, self._serialize, self._deserialize)
118+
self.insights = InsightsOperations(self._client, self._config, self._serialize, self._deserialize)
96119
self.deployments = DeploymentsOperations(self._client, self._config, self._serialize, self._deserialize)
97120
self.red_teams = RedTeamsOperations(self._client, self._config, self._serialize, self._deserialize)
121+
self.evaluation_taxonomies = EvaluationTaxonomiesOperations(
122+
self._client, self._config, self._serialize, self._deserialize
123+
)
124+
self.schedules = SchedulesOperations(self._client, self._config, self._serialize, self._deserialize)
98125
self.evaluation_results = EvaluationResultsOperations(
99126
self._client, self._config, self._serialize, self._deserialize
100127
)
128+
self.evaluation_rules = EvaluationRulesOperations(
129+
self._client, self._config, self._serialize, self._deserialize
130+
)
101131

102132
def send_request(self, request: HttpRequest, *, stream: bool = False, **kwargs: Any) -> HttpResponse:
103133
"""Runs the network request through the client's chained policies.

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/onedp/_configuration.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,30 +16,32 @@
1616
from azure.core.credentials import TokenCredential
1717

1818

19-
class AIProjectClientConfiguration: # pylint: disable=too-many-instance-attributes
20-
"""Configuration for AIProjectClient.
19+
class ProjectsClientConfiguration: # pylint: disable=too-many-instance-attributes
20+
"""Configuration for ProjectsClient.
2121
2222
Note that all parameters used to create this instance are saved as instance
2323
attributes.
2424
2525
:param endpoint: Project endpoint. In the form
26-
"https://<your-ai-services-account-name>.services.ai.azure.com/api/projects/_project"
26+
"`https://your-ai-services-account-name.services.ai.azure.com/api/projects/_project
27+
<https://your-ai-services-account-name.services.ai.azure.com/api/projects/_project>`_"
2728
if your Foundry Hub has only one Project, or to use the default Project in your Hub. Or in the
2829
form
29-
"https://<your-ai-services-account-name>.services.ai.azure.com/api/projects/<your-project-name>"
30+
"`https://your-ai-services-account-name.services.ai.azure.com/api/projects/your-project-name
31+
<https://your-ai-services-account-name.services.ai.azure.com/api/projects/your-project-name>`_"
3032
if you want to explicitly
3133
specify the Foundry Project name. Required.
3234
:type endpoint: str
3335
:param credential: Credential used to authenticate requests to the service. Required.
3436
:type credential: ~azure.core.credentials.TokenCredential
3537
:keyword api_version: The API version to use for this operation. Default value is
36-
"2025-05-15-preview". Note that overriding this default value may result in unsupported
38+
"2025-11-15-preview". Note that overriding this default value may result in unsupported
3739
behavior.
3840
:paramtype api_version: str
3941
"""
4042

4143
def __init__(self, endpoint: str, credential: "TokenCredential", **kwargs: Any) -> None:
42-
api_version: str = kwargs.pop("api_version", "2025-05-15-preview")
44+
api_version: str = kwargs.pop("api_version", "2025-11-15-preview")
4345

4446
if endpoint is None:
4547
raise ValueError("Parameter 'endpoint' must not be None.")
@@ -50,7 +52,7 @@ def __init__(self, endpoint: str, credential: "TokenCredential", **kwargs: Any)
5052
self.credential = credential
5153
self.api_version = api_version
5254
self.credential_scopes = kwargs.pop("credential_scopes", ["https://ai.azure.com/.default"])
53-
kwargs.setdefault("sdk_moniker", "ai-projects-onedp/{}".format(VERSION))
55+
kwargs.setdefault("sdk_moniker", "ai-projects/{}".format(VERSION))
5456
self.polling_interval = kwargs.get("polling_interval", 30)
5557
self._configure(**kwargs)
5658

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/onedp/_utils/model_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# pylint: disable=too-many-lines
1+
# pylint: disable=line-too-long,useless-suppression,too-many-lines
22
# coding=utf-8
33
# --------------------------------------------------------------------------
44
# Copyright (c) Microsoft Corporation. All rights reserved.

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/onedp/_validation.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,22 @@
1010
def api_version_validation(**kwargs):
1111
params_added_on = kwargs.pop("params_added_on", {})
1212
method_added_on = kwargs.pop("method_added_on", "")
13+
api_versions_list = kwargs.pop("api_versions_list", [])
14+
15+
def _index_with_default(value: str, default: int = -1) -> int:
16+
"""Get the index of value in lst, or return default if not found.
17+
18+
:param value: The value to search for in the api_versions_list.
19+
:type value: str
20+
:param default: The default value to return if the value is not found.
21+
:type default: int
22+
:return: The index of the value in the list, or the default value if not found.
23+
:rtype: int
24+
"""
25+
try:
26+
return api_versions_list.index(value)
27+
except ValueError:
28+
return default
1329

1430
def decorator(func):
1531
@functools.wraps(func)
@@ -21,7 +37,7 @@ def wrapper(*args, **kwargs):
2137
except AttributeError:
2238
return func(*args, **kwargs)
2339

24-
if method_added_on > client_api_version:
40+
if _index_with_default(method_added_on) > _index_with_default(client_api_version):
2541
raise ValueError(
2642
f"'{func.__name__}' is not available in API version "
2743
f"{client_api_version}. Pass service API version {method_added_on} or newer to your client."
@@ -31,7 +47,7 @@ def wrapper(*args, **kwargs):
3147
parameter: api_version
3248
for api_version, parameters in params_added_on.items()
3349
for parameter in parameters
34-
if parameter in kwargs and api_version > client_api_version
50+
if parameter in kwargs and _index_with_default(api_version) > _index_with_default(client_api_version)
3551
}
3652
if unsupported:
3753
raise ValueError(

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/onedp/aio/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
if TYPE_CHECKING:
1313
from ._patch import * # pylint: disable=unused-wildcard-import
1414

15-
from ._client import AIProjectClient # type: ignore
15+
from ._client import ProjectsClient # type: ignore
1616

1717
try:
1818
from ._patch import __all__ as _patch_all
@@ -22,7 +22,7 @@
2222
from ._patch import patch_sdk as _patch_sdk
2323

2424
__all__ = [
25-
"AIProjectClient",
25+
"ProjectsClient",
2626
]
2727
__all__.extend([p for p in _patch_all if p not in __all__]) # pyright: ignore
2828

0 commit comments

Comments
 (0)