|
| 1 | +# --------------------------------------------------------- |
| 2 | +# Copyright (c) Microsoft Corporation. All rights reserved. |
| 3 | +# -------------------------------------------------------- |
| 4 | + |
| 5 | +import logging |
| 6 | +import os |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +import pandas as pd |
| 10 | +import pytest |
| 11 | +from devtools_testutils import AzureRecordedTestCase, recorded_by_proxy |
| 12 | +from azure.ai.generative.evaluate import evaluate |
| 13 | + |
| 14 | +logger = logging.getLogger(__name__) |
| 15 | + |
| 16 | + |
| 17 | +@pytest.mark.e2etest |
| 18 | +@pytest.mark.live_test_only |
| 19 | +@pytest.mark.usefixtures("recorded_test") |
| 20 | +class TestEvaluate(AzureRecordedTestCase): |
| 21 | + |
| 22 | + def test_evaluate_built_in_metrics(self, e2e_openai_api_base, e2e_openai_api_key, e2e_openai_completion_deployment_name, tmpdir): |
| 23 | + test_data = [ |
| 24 | + {"question": "How do you create a run?", "context": "AML API only", |
| 25 | + "answer": "To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment."}, |
| 26 | + {"question": "How do you log a model?", "context": "Logging can be done using any OSS Sdk", |
| 27 | + "answer": "There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = '.\/outputs\/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs\/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`."}, |
| 28 | + ] |
| 29 | + |
| 30 | + with tmpdir.as_cwd(): |
| 31 | + output_path = tmpdir + "/evaluation_output" |
| 32 | + |
| 33 | + result = evaluate( # This will log metric/artifacts using mlflow |
| 34 | + evaluation_name="rag-chat-1", |
| 35 | + data=test_data, |
| 36 | + task_type="qa", |
| 37 | + metrics_list=["gpt_groundedness"], |
| 38 | + model_config={ |
| 39 | + "api_version": "2023-07-01-preview", |
| 40 | + "api_base": e2e_openai_api_base, |
| 41 | + "api_type": "azure", |
| 42 | + "api_key": e2e_openai_api_key, |
| 43 | + "deployment_id": e2e_openai_completion_deployment_name, |
| 44 | + }, |
| 45 | + data_mapping={ |
| 46 | + "questions": "question", |
| 47 | + "contexts": "context", |
| 48 | + "y_pred": "answer", |
| 49 | + "y_test": "truth", |
| 50 | + }, |
| 51 | + output_path=output_path |
| 52 | + ) |
| 53 | + |
| 54 | + metrics_summary = result.metrics_summary |
| 55 | + tabular_result = pd.read_json(os.path.join(output_path, "eval_results.jsonl"), lines=True) |
| 56 | + |
| 57 | + assert "mean_gpt_groundedness" in metrics_summary.keys() |
| 58 | + assert metrics_summary.get("mean_gpt_groundedness") == np.nanmean(tabular_result["gpt_groundedness"]) |
0 commit comments