Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions model/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,30 @@ def save_results_files(results: dict, params: dict) -> list:
os.makedirs(path, exist_ok=True)

return [
*[_save_parquet_file(path, k, v) for k, v in results.items()],
*[_save_parquet_file(path, k, v, params) for k, v in results.items()],
_save_params_file(path, params),
]


def _save_parquet_file(path: str, results_name: str, df: pd.DataFrame) -> str:
def _add_metadata_to_dataframe(df: pd.DataFrame, params: dict) -> pd.DataFrame:
"""Add metadata as columns to the dataframe, so that the saved parquet files have useful information regarding their provenance

:param df: The dataframe that we want to add the metadata to
:type df: pd.DataFrame
:param params: The parameters for the model run, which include metadata
:type params: dict
:return: The dataframe, with additional columns "dataset", "scenario" and "create_datetime"
:rtype: pd.DataFrame
"""
metadata_to_save = ["dataset", "scenario", "app_version", "create_datetime"]
for m in metadata_to_save:
df[m] = params[m]
return df


def _save_parquet_file(
path: str, results_name: str, df: pd.DataFrame, params: dict
) -> str:
"""Save a results dataframe as parquet

:param path: the folder where we want to save the results to
Expand All @@ -201,6 +219,7 @@ def _save_parquet_file(path: str, results_name: str, df: pd.DataFrame) -> str:
:return: the filename of the saved file
:rtype: str
"""
df = _add_metadata_to_dataframe(df, params)
df.to_parquet(filename := f"{path}/{results_name}.parquet")
return filename

Expand Down
39 changes: 35 additions & 4 deletions tests/test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
combine_results,
generate_results_json,
save_results_files,
_add_metadata_to_dataframe,
)


Expand Down Expand Up @@ -330,25 +331,55 @@ def test_save_results_files(mocker):
assert os_m.called_once_with(path, exist_ok=True)

assert save_parquet_mock.call_args_list == [
call(path, "default", "default_df"),
call(path, "step_counts", "step_counts_df"),
call(path, "default", "default_df", params),
call(path, "step_counts", "step_counts_df", params),
]

assert save_params_mock.called_once_with(path, params)


def test_save_parquet_file():
def test_save_parquet_file(mocker):
# arrange
df = Mock()
params = Mock()
add_metadata_to_dataframe_mock = mocker.patch(
"model.results._add_metadata_to_dataframe", return_value=df
)

# act
actual = _save_parquet_file("path", "file", df)
actual = _save_parquet_file("path", "file", df, params)

# assert
assert actual == "path/file.parquet"
add_metadata_to_dataframe_mock.assert_called_once_with(df, params)
df.to_parquet.assert_called_once_with(actual)


def test_add_metadata_to_dataframe(mocker):
# arrange
df = pd.DataFrame({"one": [1], "two": [2]})
params = {
"dataset": "dataset",
"scenario": "scenario",
"app_version": "app_version",
"create_datetime": "create_datetime",
}
expected = {
"one": [1],
"two": [2],
"dataset": ["dataset"],
"app_version": ["app_version"],
"scenario": ["scenario"],
"create_datetime": ["create_datetime"],
}

# act
actual = _add_metadata_to_dataframe(df, params)

# assert
assert actual.to_dict("list") == expected


def test_save_params_file(mocker):
# arrange
j_mock = mocker.patch("json.dump")
Expand Down
Loading