[evaluation] Add support for CSV files with evaluate() (Azure#38980)

diondrapeck · web-flow · commit df7dc2f1a2c1 · 2025-01-17T13:11:27.000-05:00
* Add support for CSV files with evaluate()

* Remove changes to unrelated test

* Remove print statements

* Update CHANGELOG

* Create classes, clean up file loading

* Simplify error message

* Simplify error message

* Update error message

* Add test for unsupported file type

* Update get_loader to return JSONL by default (per Hanchi's comment on backcompat)

* Update codeclient to use new factory loader

* Update references to JSON only support

* Revert code client change

* Update test

* Update unit tests

* Fix tests and remove unnecessary changes

* Fix formatting errors

* Fix black errors

* Add case insensitivity to filename for get_loader

* Add test for malformed data input files

* Move e2e test to new file to avoid localtest fixture skipping

* Remove unused fixture

* Fix linting errors

* Move test back to test_evaluate
diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## 1.2.0 (Unreleased)
 
 ### Features Added
+- CSV files are now supported as data file inputs with `evaluate()` API. The CSV file should have a header row with column names that match the `data` and `target` fields in the `evaluate()` method and the filename should be passed as the `data` parameter.
 
 ### Breaking Changes
 
diff --git a/sdk/evaluation/azure-ai-evaluation/assets.json b/sdk/evaluation/azure-ai-evaluation/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/evaluation/azure-ai-evaluation",
-  "Tag": "python/evaluation/azure-ai-evaluation_857104bacb"
+  "Tag": "python/evaluation/azure-ai-evaluation_c314f525e0"
 }
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -33,6 +33,7 @@
     _log_metrics_and_instance_results,
     _trace_destination_from_project_scope,
     _write_output,
+    DataLoaderFactory,
 )
 
 TClient = TypeVar("TClient", ProxyClient, CodeClient)
@@ -431,10 +432,11 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj
             )
 
     try:
-        initial_data_df = pd.read_json(data, lines=True)
+        data_loader = DataLoaderFactory.get_loader(data)
+        initial_data_df = data_loader.load()
     except Exception as e:
         raise EvaluationException(
-            message=f"Unable to load data from '{data}'. Please ensure the input is valid JSONL format. Detailed error: {e}.",
+            message=f"Unable to load data from '{data}'. Supported formats are JSONL and CSV. Detailed error: {e}.",
             target=ErrorTarget.EVALUATE,
             category=ErrorCategory.INVALID_VALUE,
             blame=ErrorBlame.USER_ERROR,
@@ -456,7 +458,7 @@ def _apply_target_to_data(
 
     :param target: The function to be applied to data.
     :type target: Callable
-    :param data: The path to input jsonl file.
+    :param data: The path to input jsonl or csv file.
     :type data: Union[str, os.PathLike]
     :param pf_client: The promptflow client to be used.
     :type pf_client: PFClient
@@ -578,7 +580,7 @@ def evaluate(
         data will be run through target function and then results will be evaluated.
 
     :keyword data: Path to the data to be evaluated or passed to target if target is set.
-        Only .jsonl format files are supported.  `target` and `data` both cannot be None. Required.
+        JSONL and CSV files are supported.  `target` and `data` both cannot be None. Required.
     :paramtype data: str
     :keyword evaluators: Evaluators to be used for evaluation. It should be a dictionary with key as alias for evaluator
         and value as the evaluator function. Required.
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
@@ -328,3 +328,30 @@ def set_event_loop_policy() -> None:
         # Reference: https://stackoverflow.com/questions/45600579/asyncio-event-loop-is-closed-when-getting-loop
         # On Windows seems to be a problem with EventLoopPolicy, use this snippet to work around it
         asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())  # type: ignore[attr-defined]
+
+
+class JSONLDataFileLoader:
+    def __init__(self, filename: Union[os.PathLike, str]):
+        self.filename = filename
+
+    def load(self) -> pd.DataFrame:
+        return pd.read_json(self.filename, lines=True)
+
+
+class CSVDataFileLoader:
+    def __init__(self, filename: Union[os.PathLike, str]):
+        self.filename = filename
+
+    def load(self) -> pd.DataFrame:
+        return pd.read_csv(self.filename)
+
+
+class DataLoaderFactory:
+    @staticmethod
+    def get_loader(filename: Union[os.PathLike, str]) -> Union[JSONLDataFileLoader, CSVDataFileLoader]:
+        filename_str = str(filename).lower()
+        if filename_str.endswith(".csv"):
+            return CSVDataFileLoader(filename)
+
+        # fallback to JSONL to maintain backward compatibility
+        return JSONLDataFileLoader(filename)
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/data/evaluate_test_data.csv b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/data/evaluate_test_data.csv
@@ -0,0 +1,52 @@
+query,context,response,ground_truth
+How do you create a run?,AML API only,"To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:
+
+```
+from azureml.core import Experiment, Run
+from azureml.core.workspace import Workspace
+
+# Define workspace and experiment
+ws = Workspace.from_config()
+exp = Experiment(workspace=ws, name='my_experiment')
+
+# Create a new run
+run = exp.start_logging()
+```
+
+In this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.",Paris is the capital of France.
+How do you log a model?,Logging can be done using any OSS Sdk,"There are a few ways to log models in Azure Machine Learning. 
+
+One way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:
+
+```python
+from azureml.core import Model
+
+model_path = './outputs/my_model.pkl'
+model = Model.register(workspace=ws, model_path=model_path, model_name='my_model')
+```
+
+This code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. 
+
+Another way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:
+
+```python
+from sklearn.linear_model import LogisticRegression
+from azureml.core.run import Run
+
+# Initialize a run object
+run = Run.get_context()
+
+# Train your model
+X_train, y_train = ...
+clf = LogisticRegression().fit(X_train, y_train)
+
+# Save the model to the Run object's outputs directory
+model_path = 'outputs/model.pkl'
+joblib.dump(value=clf, filename=model_path)
+
+# Log the model as a run artifact
+run.upload_file(name=model_path, path_or_stream=model_path)
+```
+
+In this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.",Paris is the capital of France.
+"What is the capital of France?`''""</>{}{{]",France is in Europe,Paris is the capital of France.,Paris is the capital of France.
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
@@ -17,6 +17,12 @@
 from azure.ai.evaluation._azure._clients import LiteMLClient
 
 
+@pytest.fixture
+def csv_file():
+    data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
+    return os.path.join(data_path, "evaluate_test_data.csv")
+
+
 @pytest.fixture
 def data_file():
     data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
@@ -431,3 +437,66 @@ def test_evaluate_aggregation(self, data_file, return_json, aggregate_return_jso
     @pytest.mark.skip(reason="TODO: Add test back")
     def test_prompty_with_threadpool_implementation(self):
         pass
+
+    def test_evaluate_with_csv_data(self, csv_file, data_file):
+        def remove_whitespace(s):
+            import re
+
+            return re.sub(r"\s+", "", s)
+
+        # load identical data files in different formats
+        jsonl_input_data = pd.read_json(data_file, lines=True)
+        csv_input_data = pd.read_csv(csv_file)
+
+        # create evaluator
+        f1_score_eval = F1ScoreEvaluator()
+
+        # run the evaluation on jsonl data
+        jsonl_result = evaluate(
+            data=data_file,
+            evaluators={"f1_score": f1_score_eval},
+        )
+
+        jsonl_row_result_df = pd.DataFrame(jsonl_result["rows"])
+        jsonl_metrics = jsonl_result["metrics"]
+
+        # run the evaluation on csv data
+        csv_result = evaluate(
+            data=csv_file,
+            evaluators={"f1_score": f1_score_eval},
+        )
+
+        csv_row_result_df = pd.DataFrame(csv_result["rows"])
+        csv_metrics = csv_result["metrics"]
+
+        # validate the results
+        assert jsonl_result["metrics"] == csv_result["metrics"]
+        assert jsonl_result["rows"][0]["inputs.context"] == csv_result["rows"][0]["inputs.context"]
+        assert jsonl_result["rows"][0]["inputs.query"] == csv_result["rows"][0]["inputs.query"]
+        assert jsonl_result["rows"][0]["inputs.ground_truth"] == csv_result["rows"][0]["inputs.ground_truth"]
+        assert remove_whitespace(jsonl_result["rows"][0]["inputs.response"]) == remove_whitespace(
+            csv_result["rows"][0]["inputs.response"]
+        )
+        assert (
+            jsonl_row_result_df.shape[0] == len(jsonl_input_data) == csv_row_result_df.shape[0] == len(csv_input_data)
+        )
+
+        assert "outputs.f1_score.f1_score" in jsonl_row_result_df.columns.to_list()
+        assert "outputs.f1_score.f1_score" in csv_row_result_df.columns.to_list()
+
+        assert "f1_score.f1_score" in jsonl_metrics.keys()
+        assert "f1_score.f1_score" in csv_metrics.keys()
+
+        assert jsonl_metrics.get("f1_score.f1_score") == list_mean_nan_safe(
+            jsonl_row_result_df["outputs.f1_score.f1_score"]
+        )
+        assert csv_metrics.get("f1_score.f1_score") == list_mean_nan_safe(
+            csv_row_result_df["outputs.f1_score.f1_score"]
+        )
+
+        assert (
+            jsonl_row_result_df["outputs.f1_score.f1_score"][2]
+            == csv_row_result_df["outputs.f1_score.f1_score"][2]
+            == 1
+        )
+        assert jsonl_result["studio_url"] == csv_result["studio_url"] == None
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/no_header_evaluate_test_data.csv b/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/no_header_evaluate_test_data.csv
@@ -0,0 +1,51 @@
+How do you create a run?,AML API only,"To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:
+
+```
+from azureml.core import Experiment, Run
+from azureml.core.workspace import Workspace
+
+# Define workspace and experiment
+ws = Workspace.from_config()
+exp = Experiment(workspace=ws, name='my_experiment')
+
+# Create a new run
+run = exp.start_logging()
+```
+
+In this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.",Paris is the capital of France.
+How do you log a model?,Logging can be done using any OSS Sdk,"There are a few ways to log models in Azure Machine Learning. 
+
+One way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:
+
+```python
+from azureml.core import Model
+
+model_path = './outputs/my_model.pkl'
+model = Model.register(workspace=ws, model_path=model_path, model_name='my_model')
+```
+
+This code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. 
+
+Another way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:
+
+```python
+from sklearn.linear_model import LogisticRegression
+from azureml.core.run import Run
+
+# Initialize a run object
+run = Run.get_context()
+
+# Train your model
+X_train, y_train = ...
+clf = LogisticRegression().fit(X_train, y_train)
+
+# Save the model to the Run object's outputs directory
+model_path = 'outputs/model.pkl'
+joblib.dump(value=clf, filename=model_path)
+
+# Log the model as a run artifact
+run.upload_file(name=model_path, path_or_stream=model_path)
+```
+
+In this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.",Paris is the capital of France.
+"What is the capital of France?`''""</>{}{{]",France is in Europe,Paris is the capital of France.,Paris is the capital of France.
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/unsupported_file_type.txt b/sdk/evaluation/azure-ai-evaluation/tests/unittests/data/unsupported_file_type.txt
@@ -0,0 +1,3 @@
+{"query":"How do you create a run?","context":"AML API only","response":"To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.", "ground_truth":"Paris is the capital of France."}
+{"query":"How do you log a model?","context":"Logging can be done using any OSS Sdk","response":"There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = '.\/outputs\/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs\/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.","ground_truth":"Paris is the capital of France."}
+{"query":"What is the capital of France?",
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
@@ -14,6 +14,7 @@
     ContentSafetyEvaluator,
     F1ScoreEvaluator,
     GroundednessEvaluator,
+    SimilarityEvaluator,
     ProtectedMaterialEvaluator,
     evaluate,
 )
@@ -34,6 +35,16 @@ def _get_file(name):
     return os.path.join(data_path, name)
 
 
+@pytest.fixture
+def unsupported_file_type():
+    return _get_file("unsupported_file_type.txt")
+
+
+@pytest.fixture
+def missing_header_csv_file():
+    return _get_file("no_header_evaluate_test_data.csv")
+
+
 @pytest.fixture
 def invalid_jsonl_file():
     return _get_file("invalid_evaluate_test_data.jsonl")
@@ -157,7 +168,7 @@ def test_evaluate_invalid_jsonl_data(self, mock_model_config, invalid_jsonl_file
             )
 
         assert "Unable to load data from " in exc_info.value.args[0]
-        assert "Please ensure the input is valid JSONL format." in exc_info.value.args[0]
+        assert "Supported formats are JSONL and CSV. Detailed error:" in exc_info.value.args[0]
 
     def test_evaluate_missing_required_inputs(self, missing_columns_jsonl_file):
         with pytest.raises(EvaluationException) as exc_info:
@@ -678,3 +689,31 @@ def test_optional_inputs_with_target(self, questions_file, questions_answers_bas
         )  # type: ignore
         assert double_override_results["rows"][0]["outputs.echo.echo_query"] == "new query"
         assert double_override_results["rows"][0]["outputs.echo.echo_response"] == "new response"
+
+    def test_unsupported_file_inputs(self, mock_model_config, unsupported_file_type):
+        with pytest.raises(EvaluationException) as cm:
+            evaluate(
+                data=unsupported_file_type,
+                evaluators={"groundedness": GroundednessEvaluator(model_config=mock_model_config)},
+            )
+        assert "Unable to load data from " in cm.value.args[0]
+        assert "Supported formats are JSONL and CSV. Detailed error:" in cm.value.args[0]
+
+    def test_malformed_file_inputs(self, model_config, missing_header_csv_file, missing_columns_jsonl_file):
+        with pytest.raises(EvaluationException) as exc_info:
+            evaluate(
+                data=missing_columns_jsonl_file,
+                evaluators={"similarity": SimilarityEvaluator(model_config=model_config)},
+                fail_on_evaluator_errors=True,
+            )
+
+        assert "Either 'conversation' or individual inputs must be provided." in str(exc_info.value)
+
+        with pytest.raises(EvaluationException) as exc_info:
+            evaluate(
+                data=missing_header_csv_file,
+                evaluators={"similarity": SimilarityEvaluator(model_config=model_config)},
+                fail_on_evaluator_errors=True,
+            )
+
+        assert "Either 'conversation' or individual inputs must be provided." in str(exc_info.value)

Original file line number	Diff line number	Diff line change
`@@ -2,5 +2,5 @@`
`2`	`2`	`"AssetsRepo": "Azure/azure-sdk-assets",`
`3`	`3`	`"AssetsRepoPrefixPath": "python",`
`4`	`4`	`"TagPrefix": "python/evaluation/azure-ai-evaluation",`
`5`		`- "Tag": "python/evaluation/azure-ai-evaluation_857104bacb"`
	`5`	`+ "Tag": "python/evaluation/azure-ai-evaluation_c314f525e0"`
`6`	`6`	`}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	+{"query":"How do you create a run?","context":"AML API only","response":"To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.", "ground_truth":"Paris is the capital of France."}
	`2`	+{"query":"How do you log a model?","context":"Logging can be done using any OSS Sdk","response":"There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = '.\/outputs\/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs\/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.","ground_truth":"Paris is the capital of France."}
	`3`	`+{"query":"What is the capital of France?",`