Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/power_grid_model_io/data_stores/excel_file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,18 @@ def load(self) -> TabularData:

def lazy_sheet_loader(xls_file: pd.ExcelFile, xls_sheet_name: str):
def sheet_loader():
sheet_data = xls_file.parse(xls_sheet_name, header=self._header_rows)
preview = xls_file.parse(xls_sheet_name, header=self._header_rows, nrows=0)
columns = list(preview.columns)
dtype = {}
for col in columns:
if "Name" in str(col) or (isinstance(col, tuple) and ("Name" in str(col[0]))):
dtype[col] = str
sheet_data = xls_file.parse(xls_sheet_name, header=self._header_rows, dtype=dtype)
sheet_data = self._remove_unnamed_column_placeholders(data=sheet_data)
sheet_data = self._handle_duplicate_columns(data=sheet_data, sheet_name=xls_sheet_name)
sheet_data = self._process_uuid_columns(data=sheet_data, sheet_name=xls_sheet_name)
sheet_data = self._update_column_names(data=sheet_data)

return sheet_data

return sheet_loader
Expand Down
56 changes: 55 additions & 1 deletion tests/unit/data_stores/test_vision_excel_file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from pathlib import Path
from unittest.mock import MagicMock, mock_open, patch

import pandas as pd

from power_grid_model_io.data_stores.vision_excel_file_store import VisionExcelFileStore


Expand All @@ -19,4 +21,56 @@ def test_header_rows(mock_excel_file: MagicMock):
data["foo"]

# Assert
mock_excel_file.return_value.parse.assert_called_once_with("foo", header=[0, 1])
assert mock_excel_file.return_value.parse.call_count == 2


@patch("power_grid_model_io.data_stores.excel_file_store.pd.ExcelFile")
@patch("power_grid_model_io.data_stores.excel_file_store.Path.open", mock_open())
def test_name_column_dtype_conversion(mock_excel_file: MagicMock):
store = VisionExcelFileStore(file_path=Path("dummy.xlsx"))
mock_excel_file.return_value.sheet_names = ["test_sheet"]

preview_df = pd.DataFrame(columns=["Mock.Name", "Other.Column", "ID"])

def mock_parse(*args, **kwargs):
if kwargs.get("nrows") == 0:
return preview_df
else:
actual_data = {
"Mock.Name": [123456789, 987.654],
"Other.Column": ["value1", "value2"],
"ID": [1, 2],
"ratio": [0.1, 0.2],
}
df = pd.DataFrame(actual_data)

if "dtype" in kwargs:
for col, dtype_val in kwargs["dtype"].items():
if col in df.columns and dtype_val is str:
df[col] = df[col].apply(lambda x: str(int(x)) if float(x).is_integer() else str(x))

return df

mock_excel_file.return_value.parse.side_effect = mock_parse

data = store.load()
result_df = data["test_sheet"]

assert mock_excel_file.return_value.parse.call_count == 2

first_call = mock_excel_file.return_value.parse.call_args_list[0]
assert first_call[1]["nrows"] == 0

second_call = mock_excel_file.return_value.parse.call_args_list[1]
assert "dtype" in second_call[1]
assert "Mock.Name" in second_call[1]["dtype"]
assert second_call[1]["dtype"]["Mock.Name"] is str

assert result_df["Mock.Name"][0] == "123456789" # Long int as string
assert result_df["Mock.Name"][1] == "987.654" # Float as string
assert result_df["Other.Column"][0] == "value1"
assert result_df["Other.Column"][1] == "value2"
assert result_df["ID"][0] == 1
assert result_df["ID"][1] == 2
assert result_df["ratio"][0] == 0.1
assert result_df["ratio"][1] == 0.2
Loading