Skip to content

Commit df7dc2f

Browse files
authored
[evaluation] Add support for CSV files with evaluate() (Azure#38980)
* Add support for CSV files with evaluate() * Remove changes to unrelated test * Remove print statements * Update CHANGELOG * Create classes, clean up file loading * Simplify error message * Simplify error message * Update error message * Add test for unsupported file type * Update get_loader to return JSONL by default (per Hanchi's comment on backcompat) * Update codeclient to use new factory loader * Update references to JSON only support * Revert code client change * Update test * Update unit tests * Fix tests and remove unnecessary changes * Fix formatting errors * Fix black errors * Add case insensitivity to filename for get_loader * Add test for malformed data input files * Move e2e test to new file to avoid localtest fixture skipping * Remove unused fixture * Fix linting errors * Move test back to test_evaluate
1 parent e578f59 commit df7dc2f

File tree

9 files changed

+250
-6
lines changed

9 files changed

+250
-6
lines changed

sdk/evaluation/azure-ai-evaluation/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## 1.2.0 (Unreleased)
44

55
### Features Added
6+
- CSV files are now supported as data file inputs with `evaluate()` API. The CSV file should have a header row with column names that match the `data` and `target` fields in the `evaluate()` method and the filename should be passed as the `data` parameter.
67

78
### Breaking Changes
89

sdk/evaluation/azure-ai-evaluation/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/evaluation/azure-ai-evaluation",
5-
"Tag": "python/evaluation/azure-ai-evaluation_857104bacb"
5+
"Tag": "python/evaluation/azure-ai-evaluation_c314f525e0"
66
}

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
_log_metrics_and_instance_results,
3434
_trace_destination_from_project_scope,
3535
_write_output,
36+
DataLoaderFactory,
3637
)
3738

3839
TClient = TypeVar("TClient", ProxyClient, CodeClient)
@@ -431,10 +432,11 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj
431432
)
432433

433434
try:
434-
initial_data_df = pd.read_json(data, lines=True)
435+
data_loader = DataLoaderFactory.get_loader(data)
436+
initial_data_df = data_loader.load()
435437
except Exception as e:
436438
raise EvaluationException(
437-
message=f"Unable to load data from '{data}'. Please ensure the input is valid JSONL format. Detailed error: {e}.",
439+
message=f"Unable to load data from '{data}'. Supported formats are JSONL and CSV. Detailed error: {e}.",
438440
target=ErrorTarget.EVALUATE,
439441
category=ErrorCategory.INVALID_VALUE,
440442
blame=ErrorBlame.USER_ERROR,
@@ -456,7 +458,7 @@ def _apply_target_to_data(
456458
457459
:param target: The function to be applied to data.
458460
:type target: Callable
459-
:param data: The path to input jsonl file.
461+
:param data: The path to input jsonl or csv file.
460462
:type data: Union[str, os.PathLike]
461463
:param pf_client: The promptflow client to be used.
462464
:type pf_client: PFClient
@@ -578,7 +580,7 @@ def evaluate(
578580
data will be run through target function and then results will be evaluated.
579581
580582
:keyword data: Path to the data to be evaluated or passed to target if target is set.
581-
Only .jsonl format files are supported. `target` and `data` both cannot be None. Required.
583+
JSONL and CSV files are supported. `target` and `data` both cannot be None. Required.
582584
:paramtype data: str
583585
:keyword evaluators: Evaluators to be used for evaluation. It should be a dictionary with key as alias for evaluator
584586
and value as the evaluator function. Required.

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,3 +328,30 @@ def set_event_loop_policy() -> None:
328328
# Reference: https://stackoverflow.com/questions/45600579/asyncio-event-loop-is-closed-when-getting-loop
329329
# On Windows seems to be a problem with EventLoopPolicy, use this snippet to work around it
330330
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) # type: ignore[attr-defined]
331+
332+
333+
class JSONLDataFileLoader:
334+
def __init__(self, filename: Union[os.PathLike, str]):
335+
self.filename = filename
336+
337+
def load(self) -> pd.DataFrame:
338+
return pd.read_json(self.filename, lines=True)
339+
340+
341+
class CSVDataFileLoader:
342+
def __init__(self, filename: Union[os.PathLike, str]):
343+
self.filename = filename
344+
345+
def load(self) -> pd.DataFrame:
346+
return pd.read_csv(self.filename)
347+
348+
349+
class DataLoaderFactory:
350+
@staticmethod
351+
def get_loader(filename: Union[os.PathLike, str]) -> Union[JSONLDataFileLoader, CSVDataFileLoader]:
352+
filename_str = str(filename).lower()
353+
if filename_str.endswith(".csv"):
354+
return CSVDataFileLoader(filename)
355+
356+
# fallback to JSONL to maintain backward compatibility
357+
return JSONLDataFileLoader(filename)
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
query,context,response,ground_truth
2+
How do you create a run?,AML API only,"To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:
3+
4+
```
5+
from azureml.core import Experiment, Run
6+
from azureml.core.workspace import Workspace
7+
8+
# Define workspace and experiment
9+
ws = Workspace.from_config()
10+
exp = Experiment(workspace=ws, name='my_experiment')
11+
12+
# Create a new run
13+
run = exp.start_logging()
14+
```
15+
16+
In this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.",Paris is the capital of France.
17+
How do you log a model?,Logging can be done using any OSS Sdk,"There are a few ways to log models in Azure Machine Learning.
18+
19+
One way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:
20+
21+
```python
22+
from azureml.core import Model
23+
24+
model_path = './outputs/my_model.pkl'
25+
model = Model.register(workspace=ws, model_path=model_path, model_name='my_model')
26+
```
27+
28+
This code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`.
29+
30+
Another way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:
31+
32+
```python
33+
from sklearn.linear_model import LogisticRegression
34+
from azureml.core.run import Run
35+
36+
# Initialize a run object
37+
run = Run.get_context()
38+
39+
# Train your model
40+
X_train, y_train = ...
41+
clf = LogisticRegression().fit(X_train, y_train)
42+
43+
# Save the model to the Run object's outputs directory
44+
model_path = 'outputs/model.pkl'
45+
joblib.dump(value=clf, filename=model_path)
46+
47+
# Log the model as a run artifact
48+
run.upload_file(name=model_path, path_or_stream=model_path)
49+
```
50+
51+
In this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.",Paris is the capital of France.
52+
"What is the capital of France?`''""</>{}{{]",France is in Europe,Paris is the capital of France.,Paris is the capital of France.

sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717
from azure.ai.evaluation._azure._clients import LiteMLClient
1818

1919

20+
@pytest.fixture
21+
def csv_file():
22+
data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
23+
return os.path.join(data_path, "evaluate_test_data.csv")
24+
25+
2026
@pytest.fixture
2127
def data_file():
2228
data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
@@ -431,3 +437,66 @@ def test_evaluate_aggregation(self, data_file, return_json, aggregate_return_jso
431437
@pytest.mark.skip(reason="TODO: Add test back")
432438
def test_prompty_with_threadpool_implementation(self):
433439
pass
440+
441+
def test_evaluate_with_csv_data(self, csv_file, data_file):
442+
def remove_whitespace(s):
443+
import re
444+
445+
return re.sub(r"\s+", "", s)
446+
447+
# load identical data files in different formats
448+
jsonl_input_data = pd.read_json(data_file, lines=True)
449+
csv_input_data = pd.read_csv(csv_file)
450+
451+
# create evaluator
452+
f1_score_eval = F1ScoreEvaluator()
453+
454+
# run the evaluation on jsonl data
455+
jsonl_result = evaluate(
456+
data=data_file,
457+
evaluators={"f1_score": f1_score_eval},
458+
)
459+
460+
jsonl_row_result_df = pd.DataFrame(jsonl_result["rows"])
461+
jsonl_metrics = jsonl_result["metrics"]
462+
463+
# run the evaluation on csv data
464+
csv_result = evaluate(
465+
data=csv_file,
466+
evaluators={"f1_score": f1_score_eval},
467+
)
468+
469+
csv_row_result_df = pd.DataFrame(csv_result["rows"])
470+
csv_metrics = csv_result["metrics"]
471+
472+
# validate the results
473+
assert jsonl_result["metrics"] == csv_result["metrics"]
474+
assert jsonl_result["rows"][0]["inputs.context"] == csv_result["rows"][0]["inputs.context"]
475+
assert jsonl_result["rows"][0]["inputs.query"] == csv_result["rows"][0]["inputs.query"]
476+
assert jsonl_result["rows"][0]["inputs.ground_truth"] == csv_result["rows"][0]["inputs.ground_truth"]
477+
assert remove_whitespace(jsonl_result["rows"][0]["inputs.response"]) == remove_whitespace(
478+
csv_result["rows"][0]["inputs.response"]
479+
)
480+
assert (
481+
jsonl_row_result_df.shape[0] == len(jsonl_input_data) == csv_row_result_df.shape[0] == len(csv_input_data)
482+
)
483+
484+
assert "outputs.f1_score.f1_score" in jsonl_row_result_df.columns.to_list()
485+
assert "outputs.f1_score.f1_score" in csv_row_result_df.columns.to_list()
486+
487+
assert "f1_score.f1_score" in jsonl_metrics.keys()
488+
assert "f1_score.f1_score" in csv_metrics.keys()
489+
490+
assert jsonl_metrics.get("f1_score.f1_score") == list_mean_nan_safe(
491+
jsonl_row_result_df["outputs.f1_score.f1_score"]
492+
)
493+
assert csv_metrics.get("f1_score.f1_score") == list_mean_nan_safe(
494+
csv_row_result_df["outputs.f1_score.f1_score"]
495+
)
496+
497+
assert (
498+
jsonl_row_result_df["outputs.f1_score.f1_score"][2]
499+
== csv_row_result_df["outputs.f1_score.f1_score"][2]
500+
== 1
501+
)
502+
assert jsonl_result["studio_url"] == csv_result["studio_url"] == None
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
How do you create a run?,AML API only,"To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:
2+
3+
```
4+
from azureml.core import Experiment, Run
5+
from azureml.core.workspace import Workspace
6+
7+
# Define workspace and experiment
8+
ws = Workspace.from_config()
9+
exp = Experiment(workspace=ws, name='my_experiment')
10+
11+
# Create a new run
12+
run = exp.start_logging()
13+
```
14+
15+
In this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.",Paris is the capital of France.
16+
How do you log a model?,Logging can be done using any OSS Sdk,"There are a few ways to log models in Azure Machine Learning.
17+
18+
One way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:
19+
20+
```python
21+
from azureml.core import Model
22+
23+
model_path = './outputs/my_model.pkl'
24+
model = Model.register(workspace=ws, model_path=model_path, model_name='my_model')
25+
```
26+
27+
This code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`.
28+
29+
Another way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:
30+
31+
```python
32+
from sklearn.linear_model import LogisticRegression
33+
from azureml.core.run import Run
34+
35+
# Initialize a run object
36+
run = Run.get_context()
37+
38+
# Train your model
39+
X_train, y_train = ...
40+
clf = LogisticRegression().fit(X_train, y_train)
41+
42+
# Save the model to the Run object's outputs directory
43+
model_path = 'outputs/model.pkl'
44+
joblib.dump(value=clf, filename=model_path)
45+
46+
# Log the model as a run artifact
47+
run.upload_file(name=model_path, path_or_stream=model_path)
48+
```
49+
50+
In this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.",Paris is the capital of France.
51+
"What is the capital of France?`''""</>{}{{]",France is in Europe,Paris is the capital of France.,Paris is the capital of France.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"query":"How do you create a run?","context":"AML API only","response":"To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.", "ground_truth":"Paris is the capital of France."}
2+
{"query":"How do you log a model?","context":"Logging can be done using any OSS Sdk","response":"There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = '.\/outputs\/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs\/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.","ground_truth":"Paris is the capital of France."}
3+
{"query":"What is the capital of France?",

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
ContentSafetyEvaluator,
1515
F1ScoreEvaluator,
1616
GroundednessEvaluator,
17+
SimilarityEvaluator,
1718
ProtectedMaterialEvaluator,
1819
evaluate,
1920
)
@@ -34,6 +35,16 @@ def _get_file(name):
3435
return os.path.join(data_path, name)
3536

3637

38+
@pytest.fixture
39+
def unsupported_file_type():
40+
return _get_file("unsupported_file_type.txt")
41+
42+
43+
@pytest.fixture
44+
def missing_header_csv_file():
45+
return _get_file("no_header_evaluate_test_data.csv")
46+
47+
3748
@pytest.fixture
3849
def invalid_jsonl_file():
3950
return _get_file("invalid_evaluate_test_data.jsonl")
@@ -157,7 +168,7 @@ def test_evaluate_invalid_jsonl_data(self, mock_model_config, invalid_jsonl_file
157168
)
158169

159170
assert "Unable to load data from " in exc_info.value.args[0]
160-
assert "Please ensure the input is valid JSONL format." in exc_info.value.args[0]
171+
assert "Supported formats are JSONL and CSV. Detailed error:" in exc_info.value.args[0]
161172

162173
def test_evaluate_missing_required_inputs(self, missing_columns_jsonl_file):
163174
with pytest.raises(EvaluationException) as exc_info:
@@ -678,3 +689,31 @@ def test_optional_inputs_with_target(self, questions_file, questions_answers_bas
678689
) # type: ignore
679690
assert double_override_results["rows"][0]["outputs.echo.echo_query"] == "new query"
680691
assert double_override_results["rows"][0]["outputs.echo.echo_response"] == "new response"
692+
693+
def test_unsupported_file_inputs(self, mock_model_config, unsupported_file_type):
694+
with pytest.raises(EvaluationException) as cm:
695+
evaluate(
696+
data=unsupported_file_type,
697+
evaluators={"groundedness": GroundednessEvaluator(model_config=mock_model_config)},
698+
)
699+
assert "Unable to load data from " in cm.value.args[0]
700+
assert "Supported formats are JSONL and CSV. Detailed error:" in cm.value.args[0]
701+
702+
def test_malformed_file_inputs(self, model_config, missing_header_csv_file, missing_columns_jsonl_file):
703+
with pytest.raises(EvaluationException) as exc_info:
704+
evaluate(
705+
data=missing_columns_jsonl_file,
706+
evaluators={"similarity": SimilarityEvaluator(model_config=model_config)},
707+
fail_on_evaluator_errors=True,
708+
)
709+
710+
assert "Either 'conversation' or individual inputs must be provided." in str(exc_info.value)
711+
712+
with pytest.raises(EvaluationException) as exc_info:
713+
evaluate(
714+
data=missing_header_csv_file,
715+
evaluators={"similarity": SimilarityEvaluator(model_config=model_config)},
716+
fail_on_evaluator_errors=True,
717+
)
718+
719+
assert "Either 'conversation' or individual inputs must be provided." in str(exc_info.value)

0 commit comments

Comments
 (0)