Skip to content

Commit 86ec472

Browse files
Merge pull request #1099 from PowerGridModel/feature/improve-dataset-error-messages
Error messages: clearer message when dataset is inconsistent
2 parents 6fd86e4 + 89f6bb1 commit 86ec472

File tree

5 files changed

+119
-14
lines changed

5 files changed

+119
-14
lines changed

src/power_grid_model/_core/buffer_handling.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,9 +200,15 @@ def _get_dense_buffer_properties(
200200
n_total_elements = actual_batch_size * n_elements_per_scenario
201201

202202
if is_batch is not None and is_batch != actual_is_batch:
203-
raise ValueError(f"Provided 'is batch' is incorrect for the provided data. {VALIDATOR_MSG}")
203+
raise ValueError(
204+
f"Incorrect/inconsistent data provided: {'batch' if actual_is_batch else 'single'} "
205+
f"data provided but {'batch' if is_batch else 'single'} data expected. {VALIDATOR_MSG}"
206+
)
204207
if batch_size is not None and batch_size != actual_batch_size:
205-
raise ValueError(f"Provided 'batch size' is incorrect for the provided data. {VALIDATOR_MSG}")
208+
raise ValueError(
209+
f"Incorrect/inconsistent batch size provided: {actual_batch_size} scenarios provided "
210+
f"but {batch_size} scenarios expected. {VALIDATOR_MSG}"
211+
)
206212

207213
return BufferProperties(
208214
is_sparse=is_sparse_property,

src/power_grid_model/_core/power_grid_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def _add_data(self, data: Dataset):
240240
data: the data.
241241
242242
Raises:
243-
ValueError: if the component is unknown and allow_unknown is False.
243+
ValueError: if the component is unknown.
244244
ValueError: if the data is inconsistent with the rest of the dataset.
245245
PowerGridError: if there was an internal error.
246246
"""
@@ -294,7 +294,7 @@ def _register_attribute_buffer(self, component, attr, attr_data):
294294
assert_no_error()
295295

296296
def _validate_properties(self, data: ComponentData, schema: ComponentMetaData):
297-
properties = get_buffer_properties(data, schema=schema, is_batch=self._is_batch, batch_size=self._batch_size)
297+
properties = get_buffer_properties(data, schema=schema, is_batch=None, batch_size=None)
298298
if properties.is_batch != self._is_batch:
299299
raise ValueError(
300300
f"Dataset type (single or batch) must be consistent across all components. {VALIDATOR_MSG}"

src/power_grid_model/_core/utils.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -728,8 +728,13 @@ def check_indptr_consistency(indptr: IndexPointer, batch_size: int | None, conte
728728
raise ValueError(f"indptr should start from zero and end at size of data array. {VALIDATOR_MSG}")
729729
if np.any(np.diff(indptr) < 0):
730730
raise ValueError(f"indptr should be increasing. {VALIDATOR_MSG}")
731-
if batch_size is not None and batch_size != indptr.size - 1:
732-
raise ValueError(f"Provided batch size must be equal to actual batch size. {VALIDATOR_MSG}")
731+
732+
actual_batch_size = indptr.size - 1
733+
if batch_size is not None and batch_size != actual_batch_size:
734+
raise ValueError(
735+
f"Incorrect/inconsistent batch size provided: {actual_batch_size} scenarios provided "
736+
f"but {batch_size} scenarios expected. {VALIDATOR_MSG}"
737+
)
733738

734739

735740
def get_dataset_type(data: Dataset) -> DatasetType:

tests/unit/test_buffer_handling.py

Lines changed: 85 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@
1010
import pytest
1111

1212
from power_grid_model._core.buffer_handling import (
13-
_get_dense_buffer_properties,
1413
_get_raw_attribute_data_view,
15-
_get_sparse_buffer_properties,
14+
get_buffer_properties,
1615
get_buffer_view,
1716
)
1817
from power_grid_model._core.dataset_definitions import ComponentType, DatasetType
@@ -26,16 +25,19 @@
2625

2726
def load_data(component_type, is_batch, is_sparse, is_columnar):
2827
"""Creates load data of different formats for testing"""
29-
shape = (2, 4) if is_batch else (4,)
28+
shape = (BATCH_DATASET_NDIM, SCENARIO_TOTAL_ELEMENTS) if is_batch else (SCENARIO_TOTAL_ELEMENTS,)
3029
load = initialize_array(DatasetType.update, component_type, shape)
3130
columnar_names = ["p_specified", "q_specified"]
3231

3332
if is_columnar:
3433
if is_sparse:
35-
return {"indptr": np.array([0, 5, 8]), "data": {k: load.reshape(-1)[k] for k in columnar_names}}
34+
return {
35+
"indptr": np.array([0, 5, BATCH_TOTAL_ELEMENTS]),
36+
"data": {k: load.reshape(-1)[k] for k in columnar_names},
37+
}
3638
return {k: load[k] for k in columnar_names}
3739
if is_sparse:
38-
return {"indptr": np.array([0, 5, 8]), "data": load.reshape(-1)}
40+
return {"indptr": np.array([0, 5, BATCH_TOTAL_ELEMENTS]), "data": load.reshape(-1)}
3941
return load
4042

4143

@@ -56,7 +58,7 @@ def test__get_dense_buffer_properties(component_type, is_batch, is_columnar):
5658
data = load_data(component_type, is_batch=is_batch, is_columnar=is_columnar, is_sparse=False)
5759
schema = power_grid_meta_data[DatasetType.update][component_type]
5860
batch_size = BATCH_DATASET_NDIM if is_batch else None
59-
properties = _get_dense_buffer_properties(data, schema=schema, is_batch=is_batch, batch_size=batch_size)
61+
properties = get_buffer_properties(data, schema=schema, is_batch=is_batch, batch_size=batch_size)
6062

6163
assert not properties.is_sparse
6264
assert properties.is_batch == is_batch
@@ -82,7 +84,7 @@ def test__get_sparse_buffer_properties(component_type, is_columnar):
8284
data = load_data(component_type, is_batch=True, is_columnar=is_columnar, is_sparse=True)
8385

8486
schema = power_grid_meta_data[DatasetType.update][component_type]
85-
properties = _get_sparse_buffer_properties(data, schema=schema, batch_size=2)
87+
properties = get_buffer_properties(data, schema=schema, batch_size=2)
8688

8789
assert properties.is_sparse
8890
assert properties.is_batch
@@ -95,6 +97,82 @@ def test__get_sparse_buffer_properties(component_type, is_columnar):
9597
assert properties.columns is None
9698

9799

100+
@pytest.mark.parametrize(
101+
("component_type", "is_columnar"),
102+
[
103+
pytest.param(ComponentType.sym_load, True, id="sym_load-single-columnar"),
104+
pytest.param(ComponentType.sym_load, False, id="sym_load-single-row_based"),
105+
pytest.param(ComponentType.asym_load, True, id="asym_load-single-columnar"),
106+
pytest.param(ComponentType.asym_load, False, id="asym_load-single-row_based"),
107+
],
108+
)
109+
def test__get_buffer_properties__batch_requested_for_single_data(component_type, is_columnar):
110+
data = load_data(component_type, is_batch=False, is_columnar=is_columnar, is_sparse=False)
111+
schema = power_grid_meta_data[DatasetType.update][component_type]
112+
113+
with pytest.raises(
114+
ValueError, match="Incorrect/inconsistent data provided: single data provided but batch data expected."
115+
):
116+
get_buffer_properties(data, schema=schema, is_batch=True, batch_size=BATCH_DATASET_NDIM)
117+
118+
119+
@pytest.mark.parametrize(
120+
("component_type", "is_sparse", "is_columnar"),
121+
[
122+
pytest.param(ComponentType.sym_load, False, True, id="sym_load-batch-columnar"),
123+
pytest.param(ComponentType.sym_load, False, False, id="sym_load-batch-row_based"),
124+
pytest.param(ComponentType.asym_load, False, True, id="asym_load-batch-columnar"),
125+
pytest.param(ComponentType.asym_load, False, False, id="asym_load-batch-row_based"),
126+
pytest.param(ComponentType.sym_load, True, True, id="sym_load-columnar"),
127+
pytest.param(ComponentType.sym_load, True, False, id="sym_load-row_based"),
128+
pytest.param(ComponentType.asym_load, True, True, id="asym_load-columnar"),
129+
pytest.param(ComponentType.asym_load, True, False, id="asym_load-row_based"),
130+
],
131+
)
132+
def test__get_buffer_properties__single_requested_for_batch(component_type, is_sparse, is_columnar):
133+
data = load_data(component_type, is_batch=True, is_columnar=is_columnar, is_sparse=is_sparse)
134+
schema = power_grid_meta_data[DatasetType.update][component_type]
135+
136+
if is_sparse:
137+
with pytest.raises(ValueError, match="Sparse data must be batch data"):
138+
get_buffer_properties(data, schema=schema, is_batch=False, batch_size=None)
139+
else:
140+
with pytest.raises(
141+
ValueError, match="Incorrect/inconsistent data provided: batch data provided but single data expected."
142+
):
143+
get_buffer_properties(data, schema=schema, is_batch=False, batch_size=None)
144+
145+
146+
@pytest.mark.parametrize(
147+
("component_type", "is_sparse", "is_columnar"),
148+
[
149+
pytest.param(ComponentType.sym_load, False, True, id="sym_load-batch-columnar"),
150+
pytest.param(ComponentType.sym_load, False, False, id="sym_load-batch-row_based"),
151+
pytest.param(ComponentType.asym_load, False, True, id="asym_load-batch-columnar"),
152+
pytest.param(ComponentType.asym_load, False, False, id="asym_load-batch-row_based"),
153+
pytest.param(ComponentType.sym_load, True, True, id="sym_load-columnar"),
154+
pytest.param(ComponentType.sym_load, True, False, id="sym_load-row_based"),
155+
pytest.param(ComponentType.asym_load, True, True, id="asym_load-columnar"),
156+
pytest.param(ComponentType.asym_load, True, False, id="asym_load-row_based"),
157+
],
158+
)
159+
def test__get_buffer_properties__wrong_batch_size(component_type, is_sparse, is_columnar):
160+
data = load_data(component_type, is_batch=True, is_columnar=is_columnar, is_sparse=is_sparse)
161+
schema = power_grid_meta_data[DatasetType.update][component_type]
162+
163+
actual_batch_size = BATCH_DATASET_NDIM
164+
wrong_batch_size = actual_batch_size + 1
165+
166+
with pytest.raises(
167+
ValueError,
168+
match=(
169+
f"Incorrect/inconsistent batch size provided: {actual_batch_size} scenarios provided but "
170+
f"{wrong_batch_size} scenarios expected."
171+
),
172+
):
173+
get_buffer_properties(data, schema=schema, is_batch=True, batch_size=wrong_batch_size)
174+
175+
98176
@pytest.mark.parametrize(
99177
("component", "is_batch", "is_columnar", "is_sparse"),
100178
[

tests/unit/test_dataset.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,28 @@ def test_const_dataset__sparse_batch_data(dataset_type):
152152
}
153153

154154

155+
def test_const_dataset__mixed_batch_type(dataset_type):
156+
data = {
157+
ComponentType.node: np.zeros(shape=3, dtype=power_grid_meta_data[dataset_type][ComponentType.node]),
158+
ComponentType.line: np.zeros(shape=(2, 3), dtype=power_grid_meta_data[dataset_type][ComponentType.line]),
159+
}
160+
with pytest.raises(ValueError, match=r"Dataset type \(single or batch\) must be consistent across all components."):
161+
CConstDataset(data, dataset_type)
162+
163+
data = {
164+
ComponentType.node: np.zeros(shape=(2, 3), dtype=power_grid_meta_data[dataset_type][ComponentType.node]),
165+
ComponentType.line: np.zeros(shape=3, dtype=power_grid_meta_data[dataset_type][ComponentType.line]),
166+
}
167+
with pytest.raises(ValueError, match=r"Dataset type \(single or batch\) must be consistent across all components."):
168+
CConstDataset(data, dataset_type)
169+
170+
155171
def test_const_dataset__mixed_batch_size(dataset_type):
156172
data = {
157173
ComponentType.node: np.zeros(shape=(2, 3), dtype=power_grid_meta_data[dataset_type][ComponentType.node]),
158174
ComponentType.line: np.zeros(shape=(3, 3), dtype=power_grid_meta_data[dataset_type][ComponentType.line]),
159175
}
160-
with pytest.raises(ValueError, match="Provided 'batch size' is incorrect for the provided data."):
176+
with pytest.raises(ValueError, match="Dataset must have a consistent batch size across all components."):
161177
CConstDataset(data, dataset_type)
162178

163179

0 commit comments

Comments
 (0)