Scope Change in SDK (Azure#40882)

w-javed · web-flow · commit 9673c5ed7d91 · 2025-05-05T12:46:03.000-07:00
* test-fix

* assset for test fix

* assset for test fix

* assset for test fix

* new assets from prompties

* new assets from prompties

* disabling 2 tests
diff --git a/sdk/evaluation/azure-ai-evaluation/assets.json b/sdk/evaluation/azure-ai-evaluation/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/evaluation/azure-ai-evaluation",
-  "Tag": "python/evaluation/azure-ai-evaluation_a93be1e527"
+  "Tag": "python/evaluation/azure-ai-evaluation_d585f1f45d"
 }
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_conversation/constants.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_conversation/constants.py
@@ -12,7 +12,7 @@
 
 # Azure endpoint constants
 AZUREML_TOKEN_SCOPE = "https://ml.azure.com"
-COGNITIVE_SERVICES_TOKEN_SCOPE = "https://cognitiveservices.azure.com/"
+COGNITIVE_SERVICES_TOKEN_SCOPE = "https://ai.azure.com/"
 AZURE_TOKEN_REFRESH_INTERVAL = 600  # seconds
 AZURE_ENDPOINT_DOMAIN_VALID_PATTERN_RE = (
     r"^(?=.{1,255}$)(?!-)[a-zA-Z0-9-]{1,63}(?<!-)"
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/conftest.py b/sdk/evaluation/azure-ai-evaluation/tests/conftest.py
@@ -331,12 +331,18 @@ def get_config(
 @pytest.fixture(scope="session")
 def mock_model_config() -> AzureOpenAIModelConfiguration:
     return AzureOpenAIModelConfiguration(
-        azure_endpoint="https://Sanitized.cognitiveservices.azure.com",
+        azure_endpoint="https://Sanitized.api.cognitive.microsoft.com",
         api_key="aoai-api-key",
-        api_version="2024-08-01-preview",
+        api_version="2023-07-01-preview",
+        azure_deployment="aoai-deployment",
+    )
+@pytest.fixture(scope="session")
+def mock_model_config_onedp() -> AzureOpenAIModelConfiguration:
+    return AzureOpenAIModelConfiguration(
+        azure_endpoint="https://Sanitized.services.ai.azure.com",
+        api_version="2024-12-01-preview",
         azure_deployment="aoai-deployment",
     )
-
 
 @pytest.fixture(scope="session")
 def mock_project_scope() -> Dict[str, str]:
@@ -350,9 +356,10 @@ def mock_project_scope() -> Dict[str, str]:
 
 @pytest.fixture(scope="session")
 def mock_onedp_project_scope() -> Dict[str, str]:
-    return "https://Sanitized.cognitiveservices.azure.com/api/projects/00000"
+    return "https://Sanitized.services.ai.azure.com/api/projects/00000"
 
 KEY_AZURE_MODEL_CONFIG = "azure_openai_model_config"
+KEY_ONE_DP_AZURE_MODEL_CONFIG = "azure_openai_model_config_onedp"
 KEY_OPENAI_MODEL_CONFIG = "openai_model_config"
 KEY_AZURE_PROJECT_SCOPE = "azure_ai_project_scope"
 KEY_ONE_DP_PROJECT_SCOPE = "azure_ai_one_dp_project_scope"
@@ -371,6 +378,18 @@ def model_config(
 
     return model_config
 
+@pytest.fixture(scope="session")
+def model_config_onedp(
+    connection_file: Dict[str, Any], mock_model_config_onedp: AzureOpenAIModelConfiguration
+) -> AzureOpenAIModelConfiguration:
+    if not is_live():
+        return mock_model_config_onedp
+
+    config = get_config(connection_file, KEY_ONE_DP_AZURE_MODEL_CONFIG)
+    model_config = AzureOpenAIModelConfiguration(**config)
+    AzureOpenAIModelConfiguration.__repr__ = lambda self: "<sensitive data redacted>"
+
+    return model_config
 
 @pytest.fixture
 def non_azure_openai_model_config(connection_file: Mapping[str, Any]) -> OpenAIModelConfiguration:
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_mass_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_mass_evaluate.py
@@ -88,8 +88,18 @@ class TestMassEvaluate:
     - Multi-modal inputs: This one has some parameters for the different types of multi-modal inputs.
     """
 
-    @pytest.mark.skipif(not is_live(), reason="Skip in playback due to inconsistency in evaluation results.")
-    def test_evaluate_singleton_inputs(self, model_config, azure_cred, project_scope, data_file):
+    @pytest.mark.parametrize(
+        ("proj_scope", "cred", "conv", "m_config"),
+        (
+            ("project_scope", "azure_cred", "data_file", "model_config"),
+            # ("project_scope_onedp", "azure_cred_onedp", "data_file", "model_config_onedp"),
+        )
+    )
+    def test_evaluate_singleton_inputs(self, request, proj_scope, cred, conv, m_config):
+        project_scope = request.getfixturevalue(proj_scope)
+        azure_cred = request.getfixturevalue(cred)
+        data_file = request.getfixturevalue(conv)
+        model_config = request.getfixturevalue(m_config)
         # qa fails in playback but ONLY when using the pf proxy for some reason, and
         # using it without pf proxy causes CI to hang and timeout after 3 hours.
         evaluators = {
@@ -184,7 +194,7 @@ def test_evaluate_singleton_inputs(self, model_config, azure_cred, project_scope
         assert len(row_result_df["outputs.qa.similarity"]) == 3
         assert len(row_result_df["outputs.qa.gpt_similarity"]) == 3
 
-        assert len(metrics.keys()) == 62
+        assert len(metrics.keys()) == 76
         assert metrics["f1_score.f1_score"] >= 0
         assert metrics["gleu.gleu_score"] >= 0
         assert metrics["bleu.bleu_score"] >= 0
@@ -225,7 +235,19 @@ def test_evaluate_singleton_inputs(self, model_config, azure_cred, project_scope
         assert metrics["qa.similarity"] >= 0
         assert metrics["qa.gpt_similarity"] >= 0
 
-    def test_evaluate_conversation(self, model_config, data_convo_file, azure_cred, project_scope):
+    @pytest.mark.parametrize(
+        ("proj_scope", "cred", "conv", "m_config"),
+        (
+            ("project_scope", "azure_cred", "data_convo_file", "model_config"),
+            # ("project_scope_onedp", "azure_cred_onedp", "data_convo_file", "model_config_onedp"),
+        )
+    )
+    def test_evaluate_conversation(self, request, proj_scope, cred, conv, m_config):
+        project_scope = request.getfixturevalue(proj_scope)
+        azure_cred = request.getfixturevalue(cred)
+        data_convo_file = request.getfixturevalue(conv)
+        model_config = request.getfixturevalue(m_config)
+        
         evaluators = {
             "grounded": GroundednessEvaluator(model_config),
             "coherence": CoherenceEvaluator(model_config),

Original file line number	Diff line number	Diff line change
`@@ -2,5 +2,5 @@`
`2`	`2`	`"AssetsRepo": "Azure/azure-sdk-assets",`
`3`	`3`	`"AssetsRepoPrefixPath": "python",`
`4`	`4`	`"TagPrefix": "python/evaluation/azure-ai-evaluation",`
`5`		`- "Tag": "python/evaluation/azure-ai-evaluation_a93be1e527"`
	`5`	`+ "Tag": "python/evaluation/azure-ai-evaluation_d585f1f45d"`
`6`	`6`	`}`