Skip to content

Commit 2095f64

Browse files
Merge pull request PowerGridModel#1276 from PowerGridModel/feature/owning-dataset-in-cpp-api
Move owning dataset buffer creation logic into cpp api
2 parents 3f03f55 + 99cc8ff commit 2095f64

File tree

10 files changed

+222
-91
lines changed

10 files changed

+222
-91
lines changed

.vscode/settings.json

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
{
2-
"cmake.buildDirectory": "${workspaceFolder}/cpp_build/${buildType}",
32
"C_Cpp.autoAddFileAssociations": false,
43
"cmake.configureOnOpen": true,
5-
"cmake.generator": "Ninja",
6-
"cmake.configureSettings": {
7-
"POWER_GRID_MODEL_SANITIZER": 1
8-
},
94
"testMate.cpp.test.executables": "${command:cmake.buildDirectory}/bin/power_grid_model_*_tests*",
105
"testMate.cpp.debug.configTemplate": {
116
"type": "cppvsdbg",
@@ -21,6 +16,7 @@
2116
},
2217
"doxdocgen.generic.filteredKeywords": ["PGM_API"],
2318
"doxdocgen.generic.includeTypeAtReturn": false,
19+
"python.defaultInterpreterPath": "${workspaceFolder}/.venv",
2420
"python.testing.pytestArgs": ["tests", "--no-cov"],
2521
"python.testing.unittestEnabled": false,
2622
"python.testing.pytestEnabled": true,
@@ -31,6 +27,9 @@
3127
},
3228
"C_Cpp.formatting": "clangFormat",
3329
"C_Cpp.clang_format_fallbackStyle": "LLVM",
30+
"[python]": {
31+
"editor.defaultFormatter": "charliermarsh.ruff"
32+
},
3433
"[markdown]": {
3534
"editor.defaultFormatter": "DavidAnson.vscode-markdownlint",
3635
"editor.rulers": [120]

power_grid_model_c/power_grid_model_cpp/include/power_grid_model_cpp/buffer.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ class Buffer {
1717
Buffer(MetaComponent const* component, Idx size)
1818
: component_{component}, size_{size}, buffer_{handle_.call_with(PGM_create_buffer, component, size)} {};
1919

20+
Buffer() : component_{nullptr}, size_{0}, buffer_{nullptr} {};
21+
2022
RawDataConstPtr get() const { return buffer_.get(); }
2123
RawDataPtr get() { return buffer_.get(); }
2224

power_grid_model_c/power_grid_model_cpp/include/power_grid_model_cpp/dataset.hpp

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,14 @@
99
#include "basics.hpp"
1010
#include "buffer.hpp"
1111
#include "handle.hpp"
12+
#include "utils.hpp"
1213

1314
#include "power_grid_model_c/dataset.h"
1415

16+
#include <map>
17+
#include <set>
18+
#include <variant>
19+
1520
namespace power_grid_model_cpp {
1621
class ComponentTypeNotFound : public PowerGridError {
1722
public:
@@ -210,14 +215,139 @@ class DatasetConst {
210215
DatasetInfo info_;
211216
};
212217

218+
class AttributeBuffer {
219+
private:
220+
using VariantType = std::variant<std::monostate, std::vector<ID>, std::vector<IntS>, std::vector<double>,
221+
std::vector<std::array<double, 3>>>;
222+
223+
struct BufferCreator {
224+
Idx size;
225+
template <class T> VariantType operator()() const { return std::vector<T>(size, nan_value<T>()); }
226+
};
227+
228+
struct PtrGetter {
229+
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
230+
AttributeBuffer& buffer;
231+
template <class T> RawDataPtr operator()() const { return std::get<std::vector<T>>(buffer.buffer_).data(); }
232+
};
233+
234+
public:
235+
AttributeBuffer() = default;
236+
237+
explicit AttributeBuffer(MetaAttribute const* attribute, Idx size)
238+
: attribute_{attribute},
239+
buffer_{pgm_type_func_selector(MetaData::attribute_ctype(attribute), BufferCreator{size})} {}
240+
241+
RawDataPtr get() { return pgm_type_func_selector(MetaData::attribute_ctype(attribute_), PtrGetter{*this}); }
242+
243+
private:
244+
MetaAttribute const* attribute_{nullptr};
245+
VariantType buffer_;
246+
};
247+
213248
struct OwningMemory {
214249
std::vector<Buffer> buffers;
215250
std::vector<std::vector<Idx>> indptrs;
251+
std::vector<std::vector<AttributeBuffer>> attribute_buffers;
216252
};
217253

218254
struct OwningDataset {
219255
DatasetMutable dataset;
220256
OwningMemory storage{};
257+
258+
OwningDataset(DatasetWritable& writable_dataset, bool enable_columnar_buffers = false)
259+
: dataset{writable_dataset.get_info().name(), writable_dataset.get_info().is_batch(),
260+
writable_dataset.get_info().batch_size()},
261+
storage{} {
262+
auto const& info = writable_dataset.get_info();
263+
Idx const batch_size = info.batch_size();
264+
auto const& dataset_name = info.name();
265+
266+
for (Idx component_idx{}; component_idx < info.n_components(); ++component_idx) {
267+
auto const& component_name = info.component_name(component_idx);
268+
auto const* const component_meta = MetaData::get_component_by_name(dataset_name, component_name);
269+
Idx const component_size = info.component_total_elements(component_idx);
270+
Idx const elements_per_scenario = info.component_elements_per_scenario(component_idx);
271+
272+
auto& current_indptr = storage.indptrs.emplace_back(elements_per_scenario < 0 ? batch_size + 1 : 0);
273+
if (!current_indptr.empty()) {
274+
current_indptr.at(0) = 0;
275+
current_indptr.at(batch_size) = component_size;
276+
}
277+
Idx* const indptr = current_indptr.empty() ? nullptr : current_indptr.data();
278+
if (info.has_attribute_indications(component_idx) && enable_columnar_buffers) {
279+
auto& current_buffer = storage.buffers.emplace_back();
280+
writable_dataset.set_buffer(component_name, indptr, current_buffer);
281+
dataset.add_buffer(component_name, elements_per_scenario, component_size, indptr, current_buffer);
282+
auto const& attribute_indications = info.attribute_indications(component_idx);
283+
auto& current_attribute_buffers = storage.attribute_buffers.emplace_back();
284+
for (auto const& attribute_name : attribute_indications) {
285+
auto const* const attribute_meta =
286+
MetaData::get_attribute_by_name(dataset_name, component_name, attribute_name);
287+
current_attribute_buffers.emplace_back(attribute_meta, component_size);
288+
writable_dataset.set_attribute_buffer(component_name, attribute_name,
289+
current_attribute_buffers.back().get());
290+
dataset.add_attribute_buffer(component_name, attribute_name,
291+
current_attribute_buffers.back().get());
292+
}
293+
} else {
294+
auto& current_buffer = storage.buffers.emplace_back(component_meta, component_size);
295+
storage.attribute_buffers.emplace_back(); // empty attribute buffers
296+
writable_dataset.set_buffer(component_name, indptr, current_buffer);
297+
dataset.add_buffer(component_name, elements_per_scenario, component_size, indptr, current_buffer);
298+
}
299+
}
300+
}
301+
302+
OwningDataset(
303+
OwningDataset const& ref_dataset, std::string const& dataset_name, bool is_batch = false, Idx batch_size = 1,
304+
std::map<MetaComponent const*, std::set<MetaAttribute const*>> const& output_component_attribute_filters = {})
305+
: dataset{dataset_name, is_batch, batch_size}, storage{} {
306+
DatasetInfo const& ref_info = ref_dataset.dataset.get_info();
307+
bool const enable_filters = !output_component_attribute_filters.empty();
308+
309+
for (Idx component_idx{}; component_idx != ref_info.n_components(); ++component_idx) {
310+
auto const& component_name = ref_info.component_name(component_idx);
311+
auto const& component_meta = MetaData::get_component_by_name(dataset_name, component_name);
312+
// skip components not in the filter
313+
if (enable_filters &&
314+
output_component_attribute_filters.find(component_meta) == output_component_attribute_filters.end()) {
315+
continue;
316+
}
317+
318+
// get size info from reference dataset
319+
Idx const component_elements_per_scenario = ref_info.component_elements_per_scenario(component_idx);
320+
if (component_elements_per_scenario < 0) {
321+
throw PowerGridError{"Cannot create result dataset for component with variable size per scenario"};
322+
}
323+
Idx const component_size = component_elements_per_scenario * batch_size;
324+
storage.indptrs.emplace_back();
325+
326+
auto const component_filter_it = output_component_attribute_filters.find(component_meta);
327+
std::set<MetaAttribute const*> const& attribute_filter =
328+
component_filter_it != output_component_attribute_filters.end() ? component_filter_it->second
329+
: std::set<MetaAttribute const*>{};
330+
if (attribute_filter.empty()) {
331+
// create full row buffer
332+
auto& component_buffer = storage.buffers.emplace_back(component_meta, component_size);
333+
storage.attribute_buffers.emplace_back(); // empty attribute buffers
334+
dataset.add_buffer(component_name, component_elements_per_scenario, component_size, nullptr,
335+
component_buffer);
336+
} else {
337+
// push nullptr as row buffer, and start attribute buffers
338+
auto& component_buffer = storage.buffers.emplace_back();
339+
storage.attribute_buffers.emplace_back();
340+
dataset.add_buffer(component_name, component_elements_per_scenario, component_size, nullptr,
341+
component_buffer);
342+
for (auto const* const attribute_meta : attribute_filter) {
343+
auto const attribute_name = MetaData::attribute_name(attribute_meta);
344+
auto& attribute_buffer =
345+
storage.attribute_buffers.back().emplace_back(attribute_meta, component_size);
346+
dataset.add_attribute_buffer(component_name, attribute_name, attribute_buffer.get());
347+
}
348+
}
349+
}
350+
}
221351
};
222352
} // namespace power_grid_model_cpp
223353

power_grid_model_c/power_grid_model_cpp/include/power_grid_model_cpp/serialization.hpp

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include "power_grid_model_c/serialization.h"
1515

1616
#include <cstring>
17+
#include <filesystem>
18+
#include <fstream>
1719

1820
namespace power_grid_model_cpp {
1921
class Deserializer {
@@ -83,42 +85,52 @@ class Serializer {
8385
}
8486

8587
std::string get_to_zero_terminated_string(Idx use_compact_list, Idx indent) {
86-
return std::string{
87-
handle_.call_with(PGM_serializer_get_to_zero_terminated_string, get(), use_compact_list, indent)};
88+
return std::string{get_to_zero_terminated_c_string(use_compact_list, indent)};
89+
}
90+
91+
char const* get_to_zero_terminated_c_string(Idx use_compact_list, Idx indent) {
92+
return handle_.call_with(PGM_serializer_get_to_zero_terminated_string, get(), use_compact_list, indent);
8893
}
8994

9095
private:
9196
power_grid_model_cpp::Handle handle_{};
9297
detail::UniquePtr<RawSerializer, &PGM_destroy_serializer> serializer_;
9398
};
9499

95-
inline OwningDataset create_owning_dataset(DatasetWritable& writable_dataset) {
96-
auto const& info = writable_dataset.get_info();
97-
bool const is_batch = info.is_batch();
98-
Idx const batch_size = info.batch_size();
99-
auto const& dataset_name = info.name();
100-
DatasetMutable dataset_mutable{dataset_name, is_batch, batch_size};
101-
OwningMemory storage{};
102-
103-
for (Idx component_idx{}; component_idx < info.n_components(); ++component_idx) {
104-
auto const& component_name = info.component_name(component_idx);
105-
auto const& component_meta = MetaData::get_component_by_name(dataset_name, component_name);
106-
Idx const component_size = info.component_total_elements(component_idx);
107-
Idx const elements_per_scenario = info.component_elements_per_scenario(component_idx);
108-
109-
auto& current_indptr = storage.indptrs.emplace_back(elements_per_scenario < 0 ? batch_size + 1 : 0);
110-
if (!current_indptr.empty()) {
111-
current_indptr.at(0) = 0;
112-
current_indptr.at(batch_size) = component_size;
100+
inline OwningDataset load_dataset(std::filesystem::path const& path, PGM_SerializationFormat serialization_format,
101+
bool enable_columnar_buffers = false) {
102+
auto read_file = [](std::filesystem::path const& read_file_path) {
103+
std::ifstream f{read_file_path, std::ios::binary | std::ios::ate};
104+
if (!f) {
105+
throw std::runtime_error("Failed to open file: " + read_file_path.string());
113106
}
114-
Idx* const indptr = current_indptr.empty() ? nullptr : current_indptr.data();
115-
auto& current_buffer = storage.buffers.emplace_back(component_meta, component_size);
116-
writable_dataset.set_buffer(component_name, indptr, current_buffer);
117-
dataset_mutable.add_buffer(component_name, elements_per_scenario, component_size, indptr, current_buffer);
118-
}
119-
return OwningDataset{// NOLINT(modernize-use-designated-initializers)
120-
std::move(dataset_mutable), std::move(storage)};
107+
auto const file_size = f.tellg();
108+
f.seekg(0, std::ios::beg);
109+
std::vector<char> buffer(static_cast<size_t>(file_size));
110+
f.read(buffer.data(), file_size);
111+
return buffer;
112+
};
113+
114+
Deserializer deserializer{read_file(path), serialization_format};
115+
auto& writable_dataset = deserializer.get_dataset();
116+
OwningDataset dataset{writable_dataset, enable_columnar_buffers};
117+
deserializer.parse_to_buffer();
118+
return dataset;
121119
}
120+
121+
inline void save_dataset(std::filesystem::path const& path, DatasetConst const& dataset,
122+
PGM_SerializationFormat serialization_format, Idx use_compact_list, Idx indent = 2) {
123+
Serializer serializer{dataset, serialization_format};
124+
std::string_view const serialized_data = serialization_format == PGM_msgpack
125+
? serializer.get_to_binary_buffer(use_compact_list)
126+
: serializer.get_to_zero_terminated_c_string(use_compact_list, indent);
127+
if (std::ofstream f{path, std::ios::binary}; f.is_open()) {
128+
f << serialized_data;
129+
} else {
130+
throw std::runtime_error{"Failed to open file for writing: " + path.string()};
131+
};
132+
}
133+
122134
} // namespace power_grid_model_cpp
123135

124136
#endif // POWER_GRID_MODEL_CPP_SERIALIZATION_HPP

pyproject.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,13 @@ lint = [
4646
"clang-format ~= 18.0", # C++ formatter
4747
]
4848

49-
dev = [{ include-group = "test" }, { include-group = "lint" }]
49+
example = ["ipykernel"]
50+
51+
dev = [
52+
{ include-group = "test" },
53+
{ include-group = "lint" },
54+
{ include-group = "example" },
55+
]
5056

5157
code-generation = ["dataclasses_json", "jinja2"]
5258

tests/cpp_validation_tests/test_validation.cpp

Lines changed: 6 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -41,43 +41,6 @@ auto read_json(std::filesystem::path const& path) {
4141
return j;
4242
}
4343

44-
OwningDataset create_result_dataset(OwningDataset const& input, std::string const& dataset_name, bool is_batch = false,
45-
Idx batch_size = 1) {
46-
DatasetInfo const& input_info = input.dataset.get_info();
47-
48-
OwningDataset result{.dataset = DatasetMutable{dataset_name, is_batch, batch_size}, .storage{}};
49-
50-
for (Idx component_idx{}; component_idx != input_info.n_components(); ++component_idx) {
51-
auto const& component_name = input_info.component_name(component_idx);
52-
auto const& component_meta = MetaData::get_component_by_name(dataset_name, component_name);
53-
Idx const component_elements_per_scenario = input_info.component_elements_per_scenario(component_idx);
54-
Idx const component_size = input_info.component_total_elements(component_idx);
55-
56-
auto& current_indptr = result.storage.indptrs.emplace_back(
57-
input_info.component_elements_per_scenario(component_idx) < 0 ? batch_size + 1 : 0);
58-
Idx const* const indptr = current_indptr.empty() ? nullptr : current_indptr.data();
59-
auto& current_buffer = result.storage.buffers.emplace_back(component_meta, component_size);
60-
result.dataset.add_buffer(component_name, component_elements_per_scenario, component_size, indptr,
61-
current_buffer);
62-
}
63-
return result;
64-
}
65-
66-
OwningDataset load_dataset(std::filesystem::path const& path) {
67-
auto read_file = [](std::filesystem::path const& read_file_path) {
68-
std::ifstream const f{read_file_path};
69-
std::ostringstream buffer;
70-
buffer << f.rdbuf();
71-
return buffer.str();
72-
};
73-
74-
Deserializer deserializer{read_file(path), PGM_json};
75-
auto& writable_dataset = deserializer.get_dataset();
76-
auto dataset = create_owning_dataset(writable_dataset);
77-
deserializer.parse_to_buffer();
78-
return dataset;
79-
}
80-
8144
template <typename T> std::string get_as_string(T const& attribute_value) {
8245
std::stringstream sstr;
8346
sstr << std::setprecision(16);
@@ -571,17 +534,17 @@ struct ValidationCase {
571534
ValidationCase create_validation_case(CaseParam const& param, std::string const& output_type) {
572535
// input
573536
ValidationCase validation_case{.param = param,
574-
.input = load_dataset(param.case_dir / "input.json"),
537+
.input = load_dataset(param.case_dir / "input.json", PGM_json, true),
575538
.output = std::nullopt,
576539
.update_batch = std::nullopt,
577540
.output_batch = std::nullopt};
578541

579542
// output and update
580543
if (!param.is_batch) {
581-
validation_case.output = load_dataset(param.case_dir / (output_type + ".json"));
544+
validation_case.output = load_dataset(param.case_dir / (output_type + ".json"), PGM_json);
582545
} else {
583-
validation_case.update_batch = load_dataset(param.case_dir / "update_batch.json");
584-
validation_case.output_batch = load_dataset(param.case_dir / (output_type + "_batch.json"));
546+
validation_case.update_batch = load_dataset(param.case_dir / "update_batch.json", PGM_json);
547+
validation_case.output_batch = load_dataset(param.case_dir / (output_type + "_batch.json"), PGM_json);
585548
}
586549
return validation_case;
587550
}
@@ -643,7 +606,7 @@ void validate_single_case(CaseParam const& param) {
643606
execute_test(param, [&param](Subcase& subcase) {
644607
auto const output_prefix = get_output_type(param.calculation_type, param.sym);
645608
auto const validation_case = create_validation_case(param, output_prefix);
646-
auto const result = create_result_dataset(validation_case.output.value(), output_prefix);
609+
OwningDataset const result{validation_case.output.value(), output_prefix};
647610

648611
// create and run model
649612
auto const& options = get_options(param);
@@ -661,8 +624,7 @@ void validate_batch_case(CaseParam const& param) {
661624
auto const validation_case = create_validation_case(param, output_prefix);
662625
auto const& info = validation_case.update_batch.value().dataset.get_info();
663626
Idx const batch_size = info.batch_size();
664-
auto const batch_result =
665-
create_result_dataset(validation_case.output_batch.value(), output_prefix, true, batch_size);
627+
OwningDataset const batch_result{validation_case.output_batch.value(), output_prefix, true, batch_size};
666628

667629
// create model
668630
Model model{50.0, validation_case.input.dataset};
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<!--
2+
SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
3+
4+
SPDX-License-Identifier: MPL-2.0
5+
-->
6+
7+
# Simple test case: 1 primary substation (OS), 2 secondary substations (MSR)
8+
9+
The input data has some attribut indications to test also columnar deserializer.

0 commit comments

Comments
 (0)