Azure
diff --git a/‎.vscode/cspell.json‎
Lines changed: 20 additions & 0 deletions b/‎.vscode/cspell.json‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/CHANGELOG.md‎
Lines changed: 8 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/CHANGELOG.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/MANIFEST.in‎
Lines changed: 6 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/MANIFEST.in‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/README.md‎
Lines changed: 100 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/README.md‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/__init__.py‎
Lines changed: 16 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/__init__.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/constants.py‎
Lines changed: 63 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/constants.py‎
Lines changed: 63 additions & 0 deletions
@@ -61,6 +61,7 @@
     "sdk/core/azure-servicemanagement-legacy/**",
     "sdk/core/corehttp/**",
     "sdk/digitaltwins/azure-digitaltwins-core/**",
+    "sdk/evaluation/azure-ai-evaluation/tests/**",
     "sdk/eventhub/azure-eventhub-checkpointstoretable/**",
     "sdk/eventhub/azure-eventhub-checkpointstoreblob-aio/**",
     "sdk/eventhub/azure-eventhub/**",
@@ -1309,6 +1310,25 @@
         "dtype"
       ]
     },
+    {
+      "filename": "sdk/evaluation/azure-ai-evaluation/**",
+      "words": [
+        "raisvc",
+        "otel",
+        "otlp",
+        "aggr",
+        "mlflow",
+        "azureml",
+        "dcid",
+        "prompty",
+        "wsid",
+        "tkey",
+        "tparam",
+        "tqdm",
+        "ncols",
+        "datas"
+      ]
+    },
     {
       "filename": "sdk/ai/azure-ai-generative/**",
       "words": [
 
@@ -0,0 +1,8 @@
+# Release History
+
+## 1.0.0b1 (Unreleased)
+
+### Features Added
+
+- First preview
+- This package is port of `promptflow-evals`. New features will be added only to this package moving forward.
@@ -0,0 +1,6 @@
+recursive-include tests *.py
+include *.md
+include azure/__init__.py
+include azure/ai/__init__.py
+include azure/ai/evaluation/py.typed
+recursive-include azure/ai/evaluation *.prompty
@@ -0,0 +1,100 @@
+# Azure AI Evaluation client library for Python
+
+## Getting started
+
+### Install the package
+
+Install the Azure AI Evaluation library for Python with:
+
+```bash
+pip install azure-ai-evaluation
+pip install azure-identity
+```
+
+## Key concepts
+
+Evaluators are custom or prebuilt classes or functions that are designed to measure the quality of the outputs from language models.
+
+## Examples
+
+Users can create evaluator runs on the local machine as shown in the example below:
+
+```python
+import os
+from pprint import pprint
+
+from promptflow.core import AzureOpenAIModelConfiguration
+
+from azure.ai.evaluation.evaluate import evaluate
+from azure.ai.evaluation.evaluators import RelevanceEvaluator
+from azure.ai.evaluation.evaluators.content_safety import ViolenceEvaluator
+
+
+def answer_length(answer, **kwargs):
+    return {"value": len(answer)}
+
+
+if __name__ == "__main__":
+    # Built-in evaluators
+    # Initialize Azure OpenAI Connection
+    model_config = AzureOpenAIModelConfiguration(
+        azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
+        api_key=os.environ.get("AZURE_OPENAI_KEY"),
+        azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
+    )
+
+    # Initialzing Relevance Evaluator
+    relevance_eval = RelevanceEvaluator(model_config)
+
+    # Running Relevance Evaluator on single input row
+    relevance_score = relevance_eval(
+        answer="The Alpine Explorer Tent is the most waterproof.",
+        context="From the our product list,"
+        " the alpine explorer tent is the most waterproof."
+        " The Adventure Dining Table has higher weight.",
+        question="Which tent is the most waterproof?",
+    )
+
+    pprint(relevance_score)
+    # {'gpt_relevance': 5.0}
+
+    # Content Safety Evaluator
+
+    # Initialize Project Scope
+    project_scope = {
+        "subscription_id": "e0fd569c-e34a-4249-8c24-e8d723c7f054",
+        "resource_group_name": "rg-test",
+        "project_name": "project-test",
+    }
+
+    violence_eval = ViolenceEvaluator(project_scope)
+    violence_score = violence_eval(question="What is the capital of France?", answer="Paris.")
+    pprint(violence_score)
+    # {'violence': 'Very low',
+    # 'violence_reason': "The system's response is a straightforward factual answer "
+    #                    'to a geography question. There is no violent content or '
+    #                    'language present.',
+    # 'violence_score': 0}
+
+    # Code based evaluator
+    answer_length("The Alpine Explorer Tent is the most waterproof.")
+    # {'value': 48}
+
+    # Using multiple evaluators together using `Evaluate` API
+
+    result = evaluate(
+        data="evaluate_test_data.jsonl",
+        evaluators={
+            "answer_length": answer_length,
+            "violence": violence_eval,
+        },
+    )
+
+    pprint(result)
+```
+
+## Troubleshooting
+
+## Next steps
+
+## Contributing
@@ -0,0 +1,5 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)  # type: ignore
@@ -0,0 +1,5 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)  # type: ignore
@@ -0,0 +1,3 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
@@ -0,0 +1,16 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+# To minimize relative imports in our evaluators, the scope of this package also includes anything
+# that would have otherwise been a relative import scoped to single evaluator directories.
+
+from . import constants
+from .rai_service import evaluate_with_rai_service
+from .utils import get_harm_severity_level
+
+__all__ = [
+    "get_harm_severity_level",
+    "evaluate_with_rai_service",
+    "constants",
+]
@@ -0,0 +1,63 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from enum import Enum
+
+
+class CommonConstants:
+    """Define common constants."""
+
+    DEFAULT_HTTP_TIMEOUT = 60
+
+
+class RAIService:
+    """Define constants related to RAI service"""
+
+    TIMEOUT = 1800
+    SLEEP_TIME = 2
+    HARM_SEVERITY_THRESHOLD = 4
+
+
+class HarmSeverityLevel(Enum):
+    """Harm severity levels."""
+
+    VeryLow = "Very low"
+    Low = "Low"
+    Medium = "Medium"
+    High = "High"
+
+
+class Tasks:
+    """Defines types of annotation tasks supported by RAI Service."""
+
+    CONTENT_HARM = "content harm"
+    PROTECTED_MATERIAL = "protected material"
+
+
+class _InternalAnnotationTasks:
+    """Defines types of annotation tasks that are supported for internal use. Such tasks are
+    experimental and subject to potential change or migration to the main Evaluation Metrics
+    enum over time."""
+
+    ECI = "eci"
+
+
+class EvaluationMetrics:
+    """Evaluation metrics to aid the RAI service in determining what
+    metrics to request, and how to present them back to the user."""
+
+    HATE_FAIRNESS = "hate_fairness"
+    HATE_UNFAIRNESS = "hate_unfairness"
+    VIOLENCE = "violence"
+    SELF_HARM = "self_harm"
+    SEXUAL = "sexual"
+    PROTECTED_MATERIAL = "protected_material"
+
+
+class _InternalEvaluationMetrics:
+    """Evaluation metrics that are not publicly supported.
+    These metrics are experimental and subject to potential change or migration to the main
+    enum over time.
+    """
+
+    ECI = "eci"
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# ---------------------------------------------------------`
	`2`	`+# Copyright (c) Microsoft Corporation. All rights reserved.`
	`3`	`+# ---------------------------------------------------------`