diff --git a/CMakeLists.txt b/CMakeLists.txt index 367bc15e92..e8a60b7f6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -406,6 +406,7 @@ set(CORE_SOURCE src/auxiliary/Date.cpp src/auxiliary/Filesystem.cpp src/auxiliary/JSON.cpp + src/auxiliary/JSONMatcher.cpp src/auxiliary/Mpi.cpp src/backend/Attributable.cpp src/backend/BaseRecordComponent.cpp diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index ad0dccf623..123b0a58e0 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -287,3 +287,78 @@ Explanation of the single keys: In "template" mode, only the dataset metadata (type, extent and attributes) are stored and no chunks can be written or read (i.e. write/read operations will be skipped). * ``json.attribute.mode`` / ``toml.attribute.mode``: One of ``"long"`` (default in openPMD 1.*) or ``"short"`` (default in openPMD 2.* and generally in TOML). The long format explicitly encodes the attribute type in the dataset on disk, the short format only writes the actual attribute as a JSON/TOML value, requiring readers to recover the type. + +Dataset-specific configuration +------------------------------ + +Sometimes it is beneficial to set configuration options for specific datasets. +Most dataset-specific configuration options supported by the openPMD-api are additionally backend-specific, being format-specific serialization instructions such as compression or chunking. + +All dataset-specific and backend-specific configuration is specified under the key path ``.dataset``. +Without filtering by dataset name (see the ``select``` key below) this looks like: + +.. code-block:: json + + { + "adios2": { + "dataset": { + "operators": [] + } + }, + "hdf5": { + "dataset": { + "chunking": "auto" + } + } + } + +Dataset-specific configuration options can be configured in multiple ways: + +As part of the general JSON/TOML configuration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In the simplest case, the dataset configuration is specified without any extra steps as part of the JSON/TOML configuration that is used to initialize the openPMD Series as part of the ``Series`` constructor. This does not allow specifying different configurations per dataset, but sets the default configuration for all datasets. + +As a separate JSON/TOML configuration during dataset initialization +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Similarly to the ``Series`` constructor, the ``Dataset`` constructor optionally receives a JSON/TOML configuration, used for setting options specifically only for those datasets initialized with this ``Dataset`` specification. The default given in the ``Series`` constructor will be overridden. + +This is the preferred way for configuring dataset-specific options that are *not* backend-specific (currently only ``{"resizable": true}``). + +By pattern-matching the dataset names +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The above approach has the disadvantage that it has to be supported explicitly at the level of the downstream application, e.g. a simulation or data reader. As an alternative, the the backend-specific dataset configuration under ``.dataset`` can also be given as a list of alternatives that are matched against the dataset name in sequence, e.g. ``hdf5.dataset = [, , ...]``. + +Each such pattern ```` is a JSON object with key ``cfg`` and optional key ``select``: ``{"select": , "cfg": }``. + +In here, ```` is a regex or a list of regexes, of type egrep as defined by the `C++ standard library `__. +```` is a configuration that will be forwarded as a "regular" dataset configuration to the backend. + +.. note:: + + To match lists of regular expressions ``select = [REGEX_1, REGEX_2, ..., REGEX_n]``, the list is internally transformed into a single regular expression ``($^)|(REGEX_1)|(REGEX_2)|...|(REGEX_n)``. + +In a configuration such as ``hdf5.dataset = [, , ...]``, the single patterns will be processed in top-down manner, selecting the first matching pattern found in the list. +The specified regexes will be matched against the openPMD dataset path either within the Iteration (e.g. ``meshes/E/x`` or ``particles/.*/position/.*``) or within the Series (e.g. ``/data/1/meshes/E/x`` or ``/data/.*/particles/.*/position/.*``), considering full matches only. + +.. note:: + + The dataset name is determined by the result of ``attributable.myPath().openPMDPath()`` where ``attributable`` is an object in the openPMD hierarchy. + +.. note:: + + To match against the path within the containing Iteration or within the containing Series, the specified regular expression is internally transformed into ``(/data/[0-9]+/)?(REGEX)`` where ``REGEX`` is the specified pattern, and then matched against the full dataset path. + +The **default configuration** is specified by omitting the ``select`` key. +Specifying more than one default is an error. +If no pattern matches a dataset, the default configuration is chosen if specified, or an empty JSON object ``{}`` otherwise. + +A full example: + +.. literalinclude:: openpmd_extended_config.toml + :language: toml + +.. literalinclude:: openpmd_extended_config.json + :language: json diff --git a/docs/source/details/openpmd_extended_config.json b/docs/source/details/openpmd_extended_config.json new file mode 100644 index 0000000000..b4aec29b93 --- /dev/null +++ b/docs/source/details/openpmd_extended_config.json @@ -0,0 +1,62 @@ +{ + "adios2": { + "engine": { + "parameters": { + "Profile": "On" + } + }, + "dataset": [ + { + "cfg": { + "operators": [ + { + "type": "blosc", + "parameters": { + "clevel": "1", + "doshuffle": "BLOSC_BITSHUFFLE" + } + } + ] + } + }, + { + "select": [ + ".*positionOffset.*", + ".*particlePatches.*" + ], + "cfg": { + "operators": [] + } + } + ] + }, + "hdf5": { + "independent_stores": false, + "dataset": [ + { + "cfg": { + "chunks": "auto" + } + }, + { + "select": [ + "/data/1/particles/e/.*", + "/data/2/particles/e/.*" + ], + "cfg": { + "chunks": [ + 5 + ] + } + }, + { + "select": "particles/e/.*", + "cfg": { + "chunks": [ + 10 + ] + } + } + ] + } +} diff --git a/docs/source/details/openpmd_extended_config.toml b/docs/source/details/openpmd_extended_config.toml new file mode 100644 index 0000000000..5bffc4ae63 --- /dev/null +++ b/docs/source/details/openpmd_extended_config.toml @@ -0,0 +1,44 @@ + +# ADIOS2 config + +[adios2.engine.parameters] +Profile = "On" + +# default configuration +[[adios2.dataset]] +# nested list as ADIOS2 can add multiple operators to a single dataset +[[adios2.dataset.cfg.operators]] +type = "blosc" +parameters.doshuffle = "BLOSC_BITSHUFFLE" +parameters.clevel = "1" + +# dataset-specific configuration to exclude some datasets +# from applying operators. +[[adios2.dataset]] +select = [".*positionOffset.*", ".*particlePatches.*"] +cfg.operators = [] + +# Now HDF5 + +[hdf5] +independent_stores = false + +# default configuration +# The position of the default configuration does not matter, but there must +# be only one single default configuration. +[[hdf5.dataset]] +cfg.chunks = "auto" + +# Dataset-specific configuration that specifies full paths, +# i.e. including the path to the Iteration. +# The non-default configurations are matched in top-down order, +# so the order is relevant. +[[hdf5.dataset]] +select = ["/data/1/particles/e/.*", "/data/2/particles/e/.*"] +cfg.chunks = [5] + +# dataset-specific configuration that specifies only the path +# within the Iteration +[[hdf5.dataset]] +select = "particles/e/.*" +cfg.chunks = [10] diff --git a/examples/13_write_dynamic_configuration.cpp b/examples/13_write_dynamic_configuration.cpp index b480cb2f00..10a5bde40f 100644 --- a/examples/13_write_dynamic_configuration.cpp +++ b/examples/13_write_dynamic_configuration.cpp @@ -10,13 +10,16 @@ using namespace openPMD; int main() { - if (!getVariants()["adios2"]) + if (!getVariants()["hdf5"]) { // Example configuration below selects the ADIOS2 backend return 0; } using position_t = double; + +// see https://github.com/ToruNiina/toml11/issues/205 +#if !defined(__NVCOMPILER_MAJOR__) || __NVCOMPILER_MAJOR__ >= 23 /* * This example demonstrates how to use JSON/TOML-based dynamic * configuration for openPMD. @@ -34,7 +37,7 @@ int main() # be passed by adding an at-sign `@` in front of the path # The format will then be recognized by filename extension, i.e. .json or .toml -backend = "adios2" +backend = "hdf5" iteration_encoding = "group_based" # The following is only relevant in read mode defer_iteration_parsing = true @@ -57,13 +60,104 @@ parameters.clevel = 5 # type = "some other parameter" # # ... -[hdf5.dataset] -chunks = "auto" +# Sometimes, dataset configurations should not affect all datasets, but only +# specific ones, e.g. only particle data. +# Dataset configurations can be given as a list, here at the example of HDF5. +# In such lists, each entry is an object with two keys: +# +# 1. 'cfg': Mandatory key, this is the actual dataset configuration. +# 2. 'select': A Regex or a list of Regexes to match against the dataset name. +# +# This makes it possible to give dataset-specific configurations. +# The dataset name is the same as returned +# by `Attributable::myPath().openPMDPath()`. +# The regex must match against either the full path (e.g. "/data/1/meshes/E/x") +# or against the path within the iteration (e.g. "meshes/E/x"). + +# Example: +# Let HDF5 datasets be automatically chunked by default +[[hdf5.dataset]] +cfg.chunks = "auto" + +# For particles, we can specify the chunking explicitly +[[hdf5.dataset]] +# Multiple selection regexes can be given as a list. +# They will be fused into a single regex '($^)|(regex1)|(regex2)|(regex3)|...'. +select = ["/data/1/particles/e/.*", "/data/2/particles/e/.*"] +cfg.chunks = [5] + +# Selecting a match works top-down, the order of list entries is important. +[[hdf5.dataset]] +# Specifying only a single regex. +# The regex can match against the full dataset path +# or against the path within the Iteration. +# Capitalization is irrelevant. +select = "particles/e/.*" +CFG.CHUNKS = [10] )END"; +#else + /* + * This is the same configuration in JSON. We need this in deprecated + * NVHPC-compilers due to problems that those compilers have with the + * toruniina::toml11 library. + */ + std::string const defaults = R"( +{ + "backend": "hdf5", + "defer_iteration_parsing": true, + "iteration_encoding": "group_based", + + "adios2": { + "engine": { + "type": "bp4" + }, + "dataset": { + "operators": [ + { + "parameters": { + "clevel": 5 + }, + "type": "zlib" + } + ] + } + }, + + "hdf5": { + "dataset": [ + { + "cfg": { + "chunks": "auto" + } + }, + { + "select": [ + "/data/1/particles/e/.*", + "/data/2/particles/e/.*" + ], + "cfg": { + "chunks": [ + 5 + ] + } + }, + { + "select": "particles/e/.*", + "CFG": { + "CHUNKS": [ + 10 + ] + } + } + ] + } +} +)"; +#endif // open file for writing Series series = - Series("../samples/dynamicConfig.bp", Access::CREATE, defaults); + Series("../samples/dynamicConfig.h5", Access::CREATE, defaults); Datatype datatype = determineDatatype(); constexpr unsigned long length = 10ul; @@ -93,18 +187,14 @@ chunks = "auto" /* * We want different compression settings for this dataset, so we pass - * a dataset-specific configuration. + * a dataset-specific configuration. This will override any definition + * specified above. * Also showcase how to define an resizable dataset. * This time in JSON. */ std::string const differentCompressionSettings = R"END( { "resizable": true, - "adios1": { - "dataset": { - "transform": "blosc:compressor=zlib,shuffle=bit,lvl=1;nometa" - } - }, "adios2": { "dataset": { "operators": [ diff --git a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp index da63b1196a..9dee72e02e 100644 --- a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp +++ b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp @@ -122,7 +122,6 @@ class ADIOS2IOHandlerImpl ADIOS2IOHandlerImpl( AbstractIOHandler *, MPI_Comm, - json::TracingJSON config, std::string engineType, std::string specifiedExtension); @@ -130,7 +129,6 @@ class ADIOS2IOHandlerImpl explicit ADIOS2IOHandlerImpl( AbstractIOHandler *, - json::TracingJSON config, std::string engineType, std::string specifiedExtension); @@ -345,6 +343,10 @@ class ADIOS2IOHandlerImpl // use m_config std::optional> getOperators(); + template + std::vector getDatasetOperators( + Parameter const &, Writable *, std::string const &varName); + std::string fileSuffix(bool verbose = true) const; /* @@ -587,7 +589,9 @@ namespace detail InvalidatableFile const &, std::string const &varName, Parameter ¶meters, - std::optional stepSelection); + std::optional stepSelection, + std::vector const + &operators); static constexpr char const *errorMsg = "ADIOS2: openDataset()"; }; @@ -858,6 +862,7 @@ class ADIOS2IOHandler : public AbstractIOHandler #if openPMD_HAVE_MPI ADIOS2IOHandler( + std::optional> initialize_from, std::string path, Access, MPI_Comm, @@ -868,6 +873,7 @@ class ADIOS2IOHandler : public AbstractIOHandler #endif ADIOS2IOHandler( + std::optional> initialize_from, std::string path, Access, json::TracingJSON options, diff --git a/include/openPMD/IO/AbstractIOHandler.hpp b/include/openPMD/IO/AbstractIOHandler.hpp index 291bc405a2..29b3de8bff 100644 --- a/include/openPMD/IO/AbstractIOHandler.hpp +++ b/include/openPMD/IO/AbstractIOHandler.hpp @@ -40,6 +40,11 @@ namespace openPMD { +namespace json +{ + class JsonMatcher; +} + /** * @brief Determine what items should be flushed upon Series::flush() * @@ -202,27 +207,47 @@ class AbstractIOHandler friend class Series; friend class ADIOS2IOHandlerImpl; friend class JSONIOHandlerImpl; + friend class HDF5IOHandlerImpl; friend class detail::ADIOS2File; private: void setIterationEncoding(IterationEncoding encoding); +protected: + // Needs to be a pointer due to include structure, this header is + // transitively included in user code, but we don't reexport the JSON + // library + std::unique_ptr jsonMatcher; + public: #if openPMD_HAVE_MPI - AbstractIOHandler(std::string path, Access at, MPI_Comm) - : directory{std::move(path)}, m_backendAccess{at}, m_frontendAccess{at} - {} + template + AbstractIOHandler( + std::optional> initialize_from, + std::string path, + Access at, + TracingJSON &&jsonConfig, + MPI_Comm); #endif - AbstractIOHandler(std::string path, Access at) - : directory{std::move(path)}, m_backendAccess{at}, m_frontendAccess{at} - {} - virtual ~AbstractIOHandler() = default; - AbstractIOHandler(AbstractIOHandler const &) = default; - AbstractIOHandler(AbstractIOHandler &&) = default; + template + AbstractIOHandler( + std::optional> initialize_from, + std::string path, + Access at, + TracingJSON &&jsonConfig); + + AbstractIOHandler(std::optional>); + + virtual ~AbstractIOHandler(); + + AbstractIOHandler(AbstractIOHandler const &) = delete; + // std::queue::queue(queue&&) is not noexcept + // NOLINTNEXTLINE(performance-noexcept-move-constructor) + AbstractIOHandler(AbstractIOHandler &&) noexcept(false); - AbstractIOHandler &operator=(AbstractIOHandler const &) = default; - AbstractIOHandler &operator=(AbstractIOHandler &&) = default; + AbstractIOHandler &operator=(AbstractIOHandler const &) = delete; + AbstractIOHandler &operator=(AbstractIOHandler &&) noexcept; /** Add provided task to queue according to FIFO. * diff --git a/include/openPMD/IO/AbstractIOHandlerHelper.hpp b/include/openPMD/IO/AbstractIOHandlerHelper.hpp index a5ce7a39be..5b39a288b3 100644 --- a/include/openPMD/IO/AbstractIOHandlerHelper.hpp +++ b/include/openPMD/IO/AbstractIOHandlerHelper.hpp @@ -30,6 +30,9 @@ namespace openPMD /** Construct an appropriate specific IOHandler for the desired IO mode that may be MPI-aware. * + * @param initialize_from Optionally initialize the IOHandler from a previous + * interim IOHandler which to replace with the handler now + * being initialized. * @param path Path to root folder for all operations associated with the desired handler. * @param access Access mode describing desired operations and @@ -47,6 +50,7 @@ namespace openPMD */ template std::unique_ptr createIOHandler( + std::optional> initialize_from, std::string path, Access access, Format format, @@ -58,6 +62,9 @@ std::unique_ptr createIOHandler( /** Construct an appropriate specific IOHandler for the desired IO mode. * + * @param initialize_from Optionally initialize the IOHandler from a previous + * interim IOHandler which to replace with the handler now + * being initialized. * @param path Path to root folder for all operations associated with * the desired handler. * @param access Access describing desired operations and permissions @@ -74,6 +81,7 @@ std::unique_ptr createIOHandler( */ template std::unique_ptr createIOHandler( + std::optional> initialize_from, std::string path, Access access, Format format, @@ -83,6 +91,7 @@ std::unique_ptr createIOHandler( // version without configuration to use in AuxiliaryTest std::unique_ptr createIOHandler( + std::optional> initialize_from, std::string path, Access access, Format format, diff --git a/include/openPMD/IO/HDF5/HDF5IOHandler.hpp b/include/openPMD/IO/HDF5/HDF5IOHandler.hpp index e81996b389..e661edbc2d 100644 --- a/include/openPMD/IO/HDF5/HDF5IOHandler.hpp +++ b/include/openPMD/IO/HDF5/HDF5IOHandler.hpp @@ -34,7 +34,11 @@ class HDF5IOHandlerImpl; class HDF5IOHandler : public AbstractIOHandler { public: - HDF5IOHandler(std::string path, Access, json::TracingJSON config); + HDF5IOHandler( + std::optional> initialize_from, + std::string path, + Access, + json::TracingJSON config); ~HDF5IOHandler() override; std::string backendName() const override diff --git a/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp b/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp index e4efc06ea6..a681217adf 100644 --- a/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp +++ b/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp @@ -42,10 +42,7 @@ class HDF5IOHandlerImpl : public AbstractIOHandlerImpl friend class ParallelHDF5IOHandler; public: - HDF5IOHandlerImpl( - AbstractIOHandler *, - json::TracingJSON config, - bool do_warn_unused_params = true); + HDF5IOHandlerImpl(AbstractIOHandler *, bool do_warn_unused_params = true); ~HDF5IOHandlerImpl() override; void diff --git a/include/openPMD/IO/HDF5/ParallelHDF5IOHandler.hpp b/include/openPMD/IO/HDF5/ParallelHDF5IOHandler.hpp index cd951be5d2..66518b5d0c 100644 --- a/include/openPMD/IO/HDF5/ParallelHDF5IOHandler.hpp +++ b/include/openPMD/IO/HDF5/ParallelHDF5IOHandler.hpp @@ -37,10 +37,17 @@ class ParallelHDF5IOHandler : public AbstractIOHandler public: #if openPMD_HAVE_MPI ParallelHDF5IOHandler( - std::string path, Access, MPI_Comm, json::TracingJSON config); + std::optional> initialize_from, + std::string path, + Access, + MPI_Comm, + json::TracingJSON config); #else ParallelHDF5IOHandler( - std::string const &path, Access, json::TracingJSON config); + std::optional> initialize_from, + std::string const &path, + Access, + json::TracingJSON config); #endif ~ParallelHDF5IOHandler() override; diff --git a/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp b/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp index 3b214b64cb..5f0570d217 100644 --- a/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp +++ b/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp @@ -37,8 +37,7 @@ namespace openPMD class ParallelHDF5IOHandlerImpl : public HDF5IOHandlerImpl { public: - ParallelHDF5IOHandlerImpl( - AbstractIOHandler *, MPI_Comm, json::TracingJSON config); + ParallelHDF5IOHandlerImpl(AbstractIOHandler *, MPI_Comm); ~ParallelHDF5IOHandlerImpl() override; MPI_Comm m_mpiComm; diff --git a/include/openPMD/IO/IOTask.hpp b/include/openPMD/IO/IOTask.hpp index fafe9c669b..e637df2ede 100644 --- a/include/openPMD/IO/IOTask.hpp +++ b/include/openPMD/IO/IOTask.hpp @@ -43,6 +43,10 @@ namespace openPMD { class Attributable; class Writable; +namespace json +{ + class JsonMatcher; +} Writable *getWritable(Attributable *); @@ -100,6 +104,18 @@ struct OPENPMDAPI_EXPORT AbstractParameter virtual std::unique_ptr to_heap() && = 0; + /** Warn about unused JSON paramters + * + * Template parameter so we don't have to include the JSON lib here. + * This function is useful for the createDataset() methods in, + * IOHandlerImpl's, so putting that here is the simplest way to make it + * available for them. */ + template + static void warnUnusedParameters( + TracingJSON &, + std::string const ¤tBackendName, + std::string const &warningMessage); + protected: // avoid object slicing // by allow only child classes to use these things for defining their own @@ -361,17 +377,11 @@ struct OPENPMDAPI_EXPORT Parameter std::string options = "{}"; std::optional joinedDimension; - /** Warn about unused JSON paramters - * - * Template parameter so we don't have to include the JSON lib here. - * This function is useful for the createDataset() methods in, - * IOHandlerImpl's, so putting that here is the simplest way to make it - * available for them. */ template - static void warnUnusedParameters( - TracingJSON &, - std::string const ¤tBackendName, - std::string const &warningMessage); + TracingJSON compileJSONConfig( + Writable const *writable, + json::JsonMatcher &, + std::string const &backendName) const; }; template <> @@ -409,6 +419,12 @@ struct OPENPMDAPI_EXPORT Parameter new Parameter(std::move(*this))); } + template + static TracingJSON compileJSONConfig( + Writable const *writable, + json::JsonMatcher &, + std::string const &backendName); + std::string name = ""; std::shared_ptr dtype = std::make_shared(); std::shared_ptr extent = std::make_shared(); diff --git a/include/openPMD/IO/JSON/JSONIOHandler.hpp b/include/openPMD/IO/JSON/JSONIOHandler.hpp index e22fdb93d1..e3f9b24bac 100644 --- a/include/openPMD/IO/JSON/JSONIOHandler.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandler.hpp @@ -35,6 +35,7 @@ class JSONIOHandler : public AbstractIOHandler { public: JSONIOHandler( + std::optional> initialize_from, std::string path, Access at, openPMD::json::TracingJSON config, @@ -42,6 +43,7 @@ class JSONIOHandler : public AbstractIOHandler std::string originalExtension); #if openPMD_HAVE_MPI JSONIOHandler( + std::optional> initialize_from, std::string path, Access at, MPI_Comm, diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 38966e3b82..9457abb89f 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -166,16 +166,12 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl }; explicit JSONIOHandlerImpl( - AbstractIOHandler *, - openPMD::json::TracingJSON config, - FileFormat, - std::string originalExtension); + AbstractIOHandler *, FileFormat, std::string originalExtension); #if openPMD_HAVE_MPI JSONIOHandlerImpl( AbstractIOHandler *, MPI_Comm, - openPMD::json::TracingJSON config, FileFormat, std::string originalExtension); #endif diff --git a/include/openPMD/auxiliary/JSON.hpp b/include/openPMD/auxiliary/JSON.hpp index 8c2551fe0a..26aa52281c 100644 --- a/include/openPMD/auxiliary/JSON.hpp +++ b/include/openPMD/auxiliary/JSON.hpp @@ -21,6 +21,12 @@ #pragma once +#include "openPMD/config.hpp" + +#if openPMD_HAVE_MPI +#include +#endif + #include namespace openPMD @@ -53,13 +59,62 @@ namespace json * users to overwrite default options, while keeping any other ones. * * @param defaultValue A string containing either a JSON or a TOML dataset. + * If the string begins with an `@`, the JSON/TOML dataset will be + * read from the filesystem at the specified path. * @param overwrite A string containing either a JSON or TOML dataset (does - * not need to be the same as `defaultValue`). + * not need to be the same as `defaultValue`). + * If the string begins with an `@`, the JSON/TOML dataset will be + * read from the filesystem at the specified path. * @return std::string The merged dataset, according to the above rules. If - * `defaultValue` was a JSON dataset, then as a JSON string, otherwise as a - * TOML string. + * `overwrite` was a JSON dataset, then as a JSON string, otherwise + * as a TOML string. */ std::string merge(std::string const &defaultValue, std::string const &overwrite); + +#if openPMD_HAVE_MPI + /** + * @brief Merge two JSON/TOML datasets into one. + * + * Merging rules: + * 1. If both `defaultValue` and `overwrite` are JSON/TOML objects, then the + * resulting JSON/TOML object will contain the union of both objects' + * keys. If a key is specified in both objects, the values corresponding + * to the key are merged recursively. Keys that point to a null value + * after this procedure will be pruned. + * 2. In any other case, the JSON/TOML dataset `defaultValue` is replaced in + * its entirety with the JSON/TOML dataset `overwrite`. + * + * Note that item 2 means that datasets of different type will replace each + * other without error. + * It also means that array types will replace each other without any notion + * of appending or merging. + * + * Possible use case: + * An application uses openPMD-api and wants to do the following: + * 1. Set some default backend options as JSON/TOML parameters. + * 2. Let its users specify custom backend options additionally. + * + * By using the json::merge() function, this application can then allow + * users to overwrite default options, while keeping any other ones. + * + * @param defaultValue A string containing either a JSON or a TOML dataset. + * If the string begins with an `@`, the JSON/TOML dataset will be + * read in parallel (using the MPI Communicator) + * from the filesystem at the specified path. + * @param overwrite A string containing either a JSON or TOML dataset (does + * not need to be the same as `defaultValue`). + * If the string begins with an `@`, the JSON/TOML dataset will be + * read in parallel (using the MPI Communicator) + * from the filesystem at the specified path. + * @return std::string The merged dataset, according to the above rules. If + * `overwrite` was a JSON dataset, then as a JSON string, otherwise + * as a TOML string. + */ + std::string merge( + std::string const &defaultValue, + std::string const &overwrite, + MPI_Comm); +#endif } // namespace json } // namespace openPMD diff --git a/include/openPMD/auxiliary/JSONMatcher.hpp b/include/openPMD/auxiliary/JSONMatcher.hpp new file mode 100644 index 0000000000..e89ce19dfd --- /dev/null +++ b/include/openPMD/auxiliary/JSONMatcher.hpp @@ -0,0 +1,140 @@ +#pragma once + +/* Copyright 2021-2024 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ + +#include "openPMD/auxiliary/JSON_internal.hpp" + +#include +#include +#include + +namespace openPMD::json +{ +struct Pattern +{ + std::regex pattern; + nlohmann::json config; + + Pattern(std::string const &pattern_in, nlohmann::json config_in); +}; + +/** + * @brief Matcher for dataset configurations per backend. + * + */ +class MatcherPerBackend +{ +private: + std::vector m_patterns; + + void init(TracingJSON config); + +public: + /** + * @brief For default construction. + */ + explicit MatcherPerBackend(); + + /** + * @brief Initialize one backend's JSON matcher from its configuration. + * + * This constructor will parse the given config. + * It will distinguish between ordinary openPMD JSON configurations + * and dataset-specific configurations. + * + * @param backendName The backend's JSON key. + * @param config The JSON configuration for one backend. + * E.g. for ADIOS2, this will be the sub-object/array found + * under config["adios2"]["dataset"]. + */ + MatcherPerBackend(std::string backendName, TracingJSON config); + + std::string backendName; + + /** + * @brief Get the JSON config associated with a regex pattern. + * + * @param datasetPath The regex. + * @return The matched JSON configuration, as a string. + */ + auto get(std::string const &datasetPath) const -> nlohmann::json const &; +}; +/** + * @brief Class to handle default and dataset-specific JSON configurations. + * + * This class handles parsing of the extended JSON patterns as well as + * selection of one JSON configuration by regex. + * + */ +class JsonMatcher +{ +private: + // Only one backend matcher is initialized lazily upon calling + // JsonMatcher::get() + // Usually only one backend is active, so initializing all of them + // is not necessary. + MatcherPerBackend m_backendMatcher; + TracingJSON m_entireConfig; + + auto init() -> void; + +public: + /** + * @brief For default construction. + */ + explicit JsonMatcher(); + + /** + * @brief Initialize JSON matcher from a parsed JSON config. + * + * Will go through the backends' configurations (keys defined by + * `backendKeys` in JSON_internal.hpp) and check for dataset-specific + * configurations. It will then construct: + * + * 1. A default configuration. + * 2. Matchers for retrieving dataset-specific configurations. + * + * @param config The parsed JSON configuration as specified by the user. + */ + JsonMatcher(openPMD::json::TracingJSON config); + + /** + * @brief Get the JSON config associated with a regex pattern. + * + * @param datasetPath The regex. + * @param backendName The backend name for which to resolve the pattern. + * @return The matched JSON configuration, as a string. + */ + auto get(std::string const &datasetPath, std::string const &backendName) + -> ParsedConfig; + + /** + * @brief Get the default JSON config. + * @param backendName The backend name for which to resolve the pattern. + * + * @return The default JSON configuration. + */ + auto getDefault(std::string const &backendName) -> TracingJSON; + + auto initBackendLazily(std::string const &backendName) -> void; +}; +} // namespace openPMD::json diff --git a/include/openPMD/auxiliary/JSON_internal.hpp b/include/openPMD/auxiliary/JSON_internal.hpp index 0ce32f2b14..c608bd7f9f 100644 --- a/include/openPMD/auxiliary/JSON_internal.hpp +++ b/include/openPMD/auxiliary/JSON_internal.hpp @@ -48,7 +48,7 @@ namespace json struct ParsedConfig { - nlohmann::json config; + nlohmann::json config = nlohmann::json::object(); SupportedLanguages originallySpecifiedAs{SupportedLanguages::JSON}; }; @@ -261,7 +261,8 @@ namespace json * Vector containing the lower-case keys to the single backends' * configurations. */ - extern std::vector backendKeys(); + constexpr std::array backendKeys{ + "adios2", "json", "toml", "hdf5"}; /** * Function that can be called after reading all global options from the @@ -275,8 +276,10 @@ namespace json * Like merge() as defined in JSON.hpp, but this overload works directly * on nlohmann::json values. */ - nlohmann::json & - merge(nlohmann::json &defaultVal, nlohmann::json const &overwrite); + nlohmann::json &merge_internal( + nlohmann::json &defaultVal, + nlohmann::json const &overwrite, + bool do_prune); nlohmann::json &filterByTemplate( nlohmann::json &defaultVal, nlohmann::json const &positiveMask); diff --git a/include/openPMD/backend/Attributable.hpp b/include/openPMD/backend/Attributable.hpp index a77d8fe524..732b2d1b5c 100644 --- a/include/openPMD/backend/Attributable.hpp +++ b/include/openPMD/backend/Attributable.hpp @@ -208,6 +208,7 @@ class Attributable template friend T &internal::makeOwning(T &self, Series); friend class StatefulSnapshotsContainer; + friend class internal::AttributableData; protected: // tag for internal constructor diff --git a/include/openPMD/backend/Writable.hpp b/include/openPMD/backend/Writable.hpp index be36f47758..bfb9c67e03 100644 --- a/include/openPMD/backend/Writable.hpp +++ b/include/openPMD/backend/Writable.hpp @@ -103,6 +103,8 @@ class Writable final template friend class Span; friend void debug::printDirty(Series const &); + friend struct Parameter; + friend struct Parameter; private: Writable(internal::AttributableData *); diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 647c31d856..d90fe7c8c7 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -34,6 +34,7 @@ #include "openPMD/ThrowError.hpp" #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/auxiliary/JSONMatcher.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Mpi.hpp" #include "openPMD/auxiliary/StringManip.hpp" @@ -106,7 +107,6 @@ std::optional joinedDimension(adios2::Dims const &dims) ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( AbstractIOHandler *handler, MPI_Comm communicator, - json::TracingJSON cfg, std::string engineType, std::string specifiedExtension) : AbstractIOHandlerImplCommon(handler) @@ -116,7 +116,7 @@ ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( , m_userSpecifiedExtension{std::move(specifiedExtension)} { init( - std::move(cfg), + handler->jsonMatcher->getDefault("adios2"), /* callbackWriteAttributesFromRank = */ [communicator, this](nlohmann::json const &attribute_writing_ranks) { int rank = 0; @@ -158,7 +158,6 @@ ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( AbstractIOHandler *handler, - json::TracingJSON cfg, std::string engineType, std::string specifiedExtension) : AbstractIOHandlerImplCommon(handler) @@ -166,7 +165,7 @@ ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( , m_engineType(std::move(engineType)) , m_userSpecifiedExtension(std::move(specifiedExtension)) { - init(std::move(cfg), [](auto const &...) {}); + init(handler->jsonMatcher->getDefault("adios2"), [](auto const &...) {}); } ADIOS2IOHandlerImpl::~ADIOS2IOHandlerImpl() @@ -375,6 +374,35 @@ ADIOS2IOHandlerImpl::getOperators() return getOperators(m_config); } +template +auto ADIOS2IOHandlerImpl::getDatasetOperators( + Parameter const ¶meters, Writable *writable, std::string const &varName) + -> std::vector +{ + std::vector operators; + json::TracingJSON options = + parameters.template compileJSONConfig( + writable, *m_handler->jsonMatcher, "adios2"); + if (options.json().contains("adios2")) + { + json::TracingJSON datasetConfig(options["adios2"]); + auto datasetOperators = getOperators(datasetConfig); + + operators = datasetOperators ? std::move(datasetOperators.value()) + : defaultOperators; + } + else + { + operators = defaultOperators; + } + parameters.warnUnusedParameters( + options, + "adios2", + "Warning: parts of the backend configuration for ADIOS2 dataset '" + + varName + "' remain unused:\n"); + return operators; +} + using AcceptedEndingsForEngine = std::map; std::string ADIOS2IOHandlerImpl::fileSuffix(bool verbose) const @@ -784,26 +812,8 @@ void ADIOS2IOHandlerImpl::createDataset( filePos->gd = GroupOrDataset::DATASET; auto const varName = nameOfVariable(writable); - std::vector operators; - json::TracingJSON options = - json::parseOptions(parameters.options, /* considerFiles = */ false); - if (options.json().contains("adios2")) - { - json::TracingJSON datasetConfig(options["adios2"]); - auto datasetOperators = getOperators(datasetConfig); - - operators = datasetOperators ? std::move(datasetOperators.value()) - : defaultOperators; - } - else - { - operators = defaultOperators; - } - parameters.warnUnusedParameters( - options, - "adios2", - "Warning: parts of the backend configuration for ADIOS2 dataset '" + - varName + "' remain unused:\n"); + std::vector operators = + getDatasetOperators(parameters, writable, varName); // cast from openPMD::Extent to adios2::Dims adios2::Dims shape(parameters.extent.begin(), parameters.extent.end()); @@ -1008,13 +1018,24 @@ void ADIOS2IOHandlerImpl::openDataset( auto &fileData = getFileData(file, IfFileNotOpen::ThrowError); *parameters.dtype = detail::fromADIOS2Type(fileData.m_IO.VariableType(varName)); + + /* + * Technically, the only reason to set read-time operators is for specifying + * decompression threads. This needs not happen at a per-dataset level. + * However, users may apply the same JSON/TOML config for writing and + * reading, so the dataset-specific configuration should still be explored + * here. + */ + std::vector operators = + getDatasetOperators(parameters, writable, varName); switchAdios2VariableType( *parameters.dtype, this, file, varName, parameters, - fileData.stepSelection()); + fileData.stepSelection(), + operators); writable->written = true; } @@ -2181,7 +2202,9 @@ namespace detail InvalidatableFile const &file, const std::string &varName, Parameter ¶meters, - std::optional stepSelection) + std::optional stepSelection, + std::vector const + &operators) { auto &fileData = impl->getFileData( file, ADIOS2IOHandlerImpl::IfFileNotOpen::ThrowError); @@ -2224,7 +2247,7 @@ ERROR: Variable ')"[1] + varName + } // Operators in reading needed e.g. for setting decompression threads - for (auto const &operation : impl->defaultOperators) + for (auto const &operation : operators) { if (operation.op) { @@ -2349,35 +2372,34 @@ ERROR: Variable ')"[1] + varName + #if openPMD_HAVE_MPI ADIOS2IOHandler::ADIOS2IOHandler( + std::optional> initialize_from, std::string path, openPMD::Access at, MPI_Comm comm, json::TracingJSON options, std::string engineType, std::string specifiedExtension) - : AbstractIOHandler(std::move(path), at, comm) - , m_impl{ - this, - comm, + : AbstractIOHandler( + std::move(initialize_from), + std::move(path), + at, std::move(options), - std::move(engineType), - std::move(specifiedExtension)} + comm) + , m_impl{this, comm, std::move(engineType), std::move(specifiedExtension)} {} #endif ADIOS2IOHandler::ADIOS2IOHandler( + std::optional> initialize_from, std::string path, Access at, json::TracingJSON options, std::string engineType, std::string specifiedExtension) - : AbstractIOHandler(std::move(path), at) - , m_impl{ - this, - std::move(options), - std::move(engineType), - std::move(specifiedExtension)} + : AbstractIOHandler( + std::move(initialize_from), std::move(path), at, std::move(options)) + , m_impl{this, std::move(engineType), std::move(specifiedExtension)} {} std::future @@ -2390,30 +2412,36 @@ ADIOS2IOHandler::flush(internal::ParsedFlushParams &flushParams) #if openPMD_HAVE_MPI ADIOS2IOHandler::ADIOS2IOHandler( + std::optional> initialize_from, std::string path, Access at, MPI_Comm comm, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - json::TracingJSON, + json::TracingJSON config, // NOLINTNEXTLINE(performance-unnecessary-value-param) std::string, // NOLINTNEXTLINE(performance-unnecessary-value-param) std::string) - : AbstractIOHandler(std::move(path), at, comm) + : AbstractIOHandler( + std::move(initialize_from), + std::move(path), + at, + std::move(config), + comm) {} #endif // openPMD_HAVE_MPI ADIOS2IOHandler::ADIOS2IOHandler( + std::optional> initialize_from, std::string path, Access at, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - json::TracingJSON, + json::TracingJSON config, // NOLINTNEXTLINE(performance-unnecessary-value-param) std::string, // NOLINTNEXTLINE(performance-unnecessary-value-param) std::string) - : AbstractIOHandler(std::move(path), at) + : AbstractIOHandler( + std::move(initialize_from), std::move(path), at, std::move(config)) {} std::future ADIOS2IOHandler::flush(internal::ParsedFlushParams &) diff --git a/src/IO/AbstractIOHandler.cpp b/src/IO/AbstractIOHandler.cpp index c8d3412fe2..f2a30b9974 100644 --- a/src/IO/AbstractIOHandler.cpp +++ b/src/IO/AbstractIOHandler.cpp @@ -23,6 +23,8 @@ #include "openPMD/Error.hpp" #include "openPMD/IO/FlushParametersInternal.hpp" +#include "openPMD/auxiliary/JSONMatcher.hpp" + #include namespace openPMD::auxiliary @@ -121,4 +123,52 @@ bool AbstractIOHandler::fullSupportForVariableBasedEncoding() const { return false; } + +#if openPMD_HAVE_MPI +template <> +AbstractIOHandler::AbstractIOHandler( + std::optional> initialize_from, + std::string path, + Access at, + json::TracingJSON &&jsonConfig, + MPI_Comm) + : AbstractIOHandler(std::move(initialize_from)) +{ + jsonMatcher = std::make_unique(std::move(jsonConfig)); + directory = std::move(path); + m_backendAccess = at; + m_frontendAccess = at; +} +#endif + +template <> +AbstractIOHandler::AbstractIOHandler( + std::optional> initialize_from, + std::string path, + Access at, + json::TracingJSON &&jsonConfig) + : AbstractIOHandler(std::move(initialize_from)) +{ + jsonMatcher = std::make_unique(std::move(jsonConfig)); + directory = std::move(path); + m_backendAccess = at; + m_frontendAccess = at; +} + +AbstractIOHandler::AbstractIOHandler( + std::optional> initialize_from) +{ + if (initialize_from.has_value() && *initialize_from) + { + this->operator=(std::move(**initialize_from)); + } +} + +AbstractIOHandler::~AbstractIOHandler() = default; +// std::queue::queue(queue&&) is not noexcept +// NOLINTNEXTLINE(performance-noexcept-move-constructor) +AbstractIOHandler::AbstractIOHandler(AbstractIOHandler &&) = default; + +AbstractIOHandler & +AbstractIOHandler::operator=(AbstractIOHandler &&) noexcept = default; } // namespace openPMD diff --git a/src/IO/AbstractIOHandlerHelper.cpp b/src/IO/AbstractIOHandlerHelper.cpp index 8576343e5d..55e7f9cb5d 100644 --- a/src/IO/AbstractIOHandlerHelper.cpp +++ b/src/IO/AbstractIOHandlerHelper.cpp @@ -65,6 +65,7 @@ namespace #if openPMD_HAVE_MPI template <> std::unique_ptr createIOHandler( + std::optional> initialize_from, std::string path, Access access, Format format, @@ -78,10 +79,16 @@ std::unique_ptr createIOHandler( { case Format::HDF5: return constructIOHandler( - "HDF5", path, access, comm, std::move(options)); + "HDF5", + std::move(initialize_from), + path, + access, + comm, + std::move(options)); case Format::ADIOS2_BP: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, comm, @@ -91,6 +98,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_BP4: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, comm, @@ -100,6 +108,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_BP5: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, comm, @@ -109,6 +118,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_SST: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, comm, @@ -118,6 +128,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_SSC: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, comm, @@ -127,6 +138,7 @@ std::unique_ptr createIOHandler( case Format::JSON: return constructIOHandler( "JSON", + std::move(initialize_from), path, access, comm, @@ -136,6 +148,7 @@ std::unique_ptr createIOHandler( case Format::TOML: return constructIOHandler( "JSON", + std::move(initialize_from), path, access, comm, @@ -153,6 +166,7 @@ std::unique_ptr createIOHandler( template <> std::unique_ptr createIOHandler( + std::optional> initialize_from, std::string path, Access access, Format format, @@ -165,10 +179,15 @@ std::unique_ptr createIOHandler( { case Format::HDF5: return constructIOHandler( - "HDF5", path, access, std::move(options)); + "HDF5", + std::move(initialize_from), + path, + access, + std::move(options)); case Format::ADIOS2_BP: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -177,6 +196,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_BP4: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -185,6 +205,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_BP5: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -193,6 +214,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_SST: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -201,6 +223,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_SSC: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -209,6 +232,7 @@ std::unique_ptr createIOHandler( case Format::JSON: return constructIOHandler( "JSON", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -217,6 +241,7 @@ std::unique_ptr createIOHandler( case Format::TOML: return constructIOHandler( "JSON", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -231,12 +256,14 @@ std::unique_ptr createIOHandler( } std::unique_ptr createIOHandler( + std::optional> initialize_from, std::string path, Access access, Format format, std::string originalExtension) { return createIOHandler( + std::move(initialize_from), std::move(path), access, format, diff --git a/src/IO/DummyIOHandler.cpp b/src/IO/DummyIOHandler.cpp index 7882c9d5e3..9d8a7fac1a 100644 --- a/src/IO/DummyIOHandler.cpp +++ b/src/IO/DummyIOHandler.cpp @@ -19,14 +19,21 @@ * If not, see . */ #include "openPMD/IO/DummyIOHandler.hpp" +#include "openPMD/auxiliary/JSON_internal.hpp" #include +#include #include namespace openPMD { DummyIOHandler::DummyIOHandler(std::string path, Access at) - : AbstractIOHandler(std::move(path), at) + : AbstractIOHandler( + std::nullopt, + std::move(path), + at, + json::TracingJSON( + nlohmann::json::object(), json::SupportedLanguages::JSON)) {} void DummyIOHandler::enqueue(IOTask const &) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 48e24fd89b..f852049463 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -37,6 +37,7 @@ #include "openPMD/IO/HDF5/HDF5FilePosition.hpp" #include "openPMD/IO/IOTask.hpp" #include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/auxiliary/JSONMatcher.hpp" #include "openPMD/auxiliary/Mpi.hpp" #include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/auxiliary/TypeTraits.hpp" @@ -75,9 +76,7 @@ namespace openPMD #endif HDF5IOHandlerImpl::HDF5IOHandlerImpl( - AbstractIOHandler *handler, - json::TracingJSON config, - bool do_warn_unused_params) + AbstractIOHandler *handler, bool do_warn_unused_params) : AbstractIOHandlerImpl(handler) , m_datasetTransferProperty{H5P_DEFAULT} , m_fileAccessProperty{H5P_DEFAULT} @@ -143,6 +142,8 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl( m_H5T_LONG_DOUBLE_80_LE >= 0, "[HDF5] Internal error: Failed to create 128-bit complex long double"); + auto config = handler->jsonMatcher->getDefault("hdf5"); + // JSON option can overwrite env option: if (config.json().contains("hdf5")) { @@ -174,7 +175,8 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl( json::filterByTemplate( m_global_flush_config, nlohmann::json::parse(flush_cfg_mask)); auto init_json_shadow = nlohmann::json::parse(init_json_shadow_str); - json::merge(m_config.getShadow(), init_json_shadow); + json::merge_internal( + m_config.getShadow(), init_json_shadow, /* do_prune = */ false); } // unused params @@ -502,13 +504,15 @@ void HDF5IOHandlerImpl::createDataset( } json::TracingJSON config = [&]() { - auto parsed_config = json::parseOptions( - parameters.options, /* considerFiles = */ false); + auto parsed_config = + parameters.compileJSONConfig( + writable, *m_handler->jsonMatcher, "hdf5"); if (auto hdf5_config_it = parsed_config.config.find("hdf5"); hdf5_config_it != parsed_config.config.end()) { auto copy = m_global_dataset_config; - json::merge(copy, hdf5_config_it.value()); + json::merge_internal( + copy, hdf5_config_it.value(), /* do_prune = */ true); hdf5_config_it.value() = std::move(copy); } else @@ -3007,9 +3011,13 @@ std::future HDF5IOHandlerImpl::flush(internal::ParsedFlushParams ¶ms) #if openPMD_HAVE_HDF5 HDF5IOHandler::HDF5IOHandler( - std::string path, Access at, json::TracingJSON config) - : AbstractIOHandler(std::move(path), at) - , m_impl{new HDF5IOHandlerImpl(this, std::move(config))} + std::optional> initialize_from, + std::string path, + Access at, + json::TracingJSON config) + : AbstractIOHandler( + std::move(initialize_from), std::move(path), at, std::move(config)) + , m_impl{new HDF5IOHandlerImpl(this)} {} HDF5IOHandler::~HDF5IOHandler() = default; @@ -3021,11 +3029,12 @@ std::future HDF5IOHandler::flush(internal::ParsedFlushParams ¶ms) #else HDF5IOHandler::HDF5IOHandler( + std::optional> initialize_from, std::string path, Access at, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - [[maybe_unused]] json::TracingJSON config) - : AbstractIOHandler(std::move(path), at) + json::TracingJSON config) + : AbstractIOHandler( + std::move(initialize_from), std::move(path), at, std::move(config)) { throw std::runtime_error("openPMD-api built without HDF5 support"); } diff --git a/src/IO/HDF5/ParallelHDF5IOHandler.cpp b/src/IO/HDF5/ParallelHDF5IOHandler.cpp index e1192c8a9c..404b33c888 100644 --- a/src/IO/HDF5/ParallelHDF5IOHandler.cpp +++ b/src/IO/HDF5/ParallelHDF5IOHandler.cpp @@ -58,9 +58,18 @@ namespace openPMD #endif ParallelHDF5IOHandler::ParallelHDF5IOHandler( - std::string path, Access at, MPI_Comm comm, json::TracingJSON config) - : AbstractIOHandler(std::move(path), at, comm) - , m_impl{new ParallelHDF5IOHandlerImpl(this, comm, std::move(config))} + std::optional> initialize_from, + std::string path, + Access at, + MPI_Comm comm, + json::TracingJSON config) + : AbstractIOHandler( + std::move(initialize_from), + std::move(path), + at, + std::move(config), + comm) + , m_impl{new ParallelHDF5IOHandlerImpl(this, comm)} {} ParallelHDF5IOHandler::~ParallelHDF5IOHandler() = default; @@ -72,7 +81,8 @@ ParallelHDF5IOHandler::flush(internal::ParsedFlushParams ¶ms) hdf5_config_it != params.backendConfig.json().end()) { auto copied_global_cfg = m_impl->m_global_flush_config; - json::merge(copied_global_cfg, hdf5_config_it.value()); + json::merge_internal( + copied_global_cfg, hdf5_config_it.value(), /* do_prune = */ true); hdf5_config_it.value() = std::move(copied_global_cfg); } else @@ -83,8 +93,8 @@ ParallelHDF5IOHandler::flush(internal::ParsedFlushParams ¶ms) } ParallelHDF5IOHandlerImpl::ParallelHDF5IOHandlerImpl( - AbstractIOHandler *handler, MPI_Comm comm, json::TracingJSON config) - : HDF5IOHandlerImpl{handler, std::move(config), /* do_warn_unused_params = */ false} + AbstractIOHandler *handler, MPI_Comm comm) + : HDF5IOHandlerImpl{handler, /* do_warn_unused_params = */ false} , m_mpiComm{comm} , m_mpiInfo{MPI_INFO_NULL} /* MPI 3.0+: MPI_INFO_ENV */ { @@ -422,22 +432,27 @@ ParallelHDF5IOHandlerImpl::flush(internal::ParsedFlushParams ¶ms) #if openPMD_HAVE_MPI ParallelHDF5IOHandler::ParallelHDF5IOHandler( + std::optional> initialize_from, std::string path, Access at, MPI_Comm comm, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - [[maybe_unused]] json::TracingJSON config) - : AbstractIOHandler(std::move(path), at, comm) + json::TracingJSON config) + : AbstractIOHandler( + std::move(initialize_from), + std::move(path), + at, + std::move(config), + comm) { throw std::runtime_error("openPMD-api built without HDF5 support"); } #else ParallelHDF5IOHandler::ParallelHDF5IOHandler( + std::optional> initialize_from, std::string const &path, Access at, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - [[maybe_unused]] json::TracingJSON config) - : AbstractIOHandler(path, at) + json::TracingJSON config) + : AbstractIOHandler(std::move(initialize_from), path, at, std::move(config)) { throw std::runtime_error( "openPMD-api built without parallel support and without HDF5 support"); diff --git a/src/IO/IOTask.cpp b/src/IO/IOTask.cpp index 0692547745..50fe180a40 100644 --- a/src/IO/IOTask.cpp +++ b/src/IO/IOTask.cpp @@ -19,10 +19,12 @@ * If not, see . */ #include "openPMD/IO/IOTask.hpp" +#include "openPMD/auxiliary/JSONMatcher.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/backend/Attributable.hpp" #include // std::cerr +#include namespace openPMD { @@ -32,8 +34,7 @@ Writable *getWritable(Attributable *a) } template <> -void Parameter::warnUnusedParameters< - json::TracingJSON>( +void AbstractParameter::warnUnusedParameters( json::TracingJSON &config, std::string const ¤tBackendName, std::string const &warningMessage) @@ -50,7 +51,7 @@ void Parameter::warnUnusedParameters< auto shadow = config.invertShadow(); // The backends are supposed to deal with this // Only global options here - for (auto const &backendKey : json::backendKeys()) + for (auto const &backendKey : json::backendKeys) { if (backendKey != currentBackendName) { @@ -74,6 +75,57 @@ void Parameter::warnUnusedParameters< } } +namespace +{ + template + json::ParsedConfig doCompileJSONConfig( + Attributable const &attri, + json::JsonMatcher &jsonMatcher, + std::string const &backendName, + Functor &&transformResult) + { + auto path = attri.myPath().openPMDPath(); + auto base_config = jsonMatcher.get(path, backendName); + json::ParsedConfig res{ + std::move(base_config.config), base_config.originallySpecifiedAs}; + std::forward(transformResult)(res); + return res; + } +} // namespace + +template <> +json::ParsedConfig Parameter::compileJSONConfig( + Writable const *writable, + json::JsonMatcher &jsonMatcher, + std::string const &backendName) const +{ + auto attri = writable->attributable->asInternalCopyOf(); + return doCompileJSONConfig( + attri, jsonMatcher, backendName, [&](json::ParsedConfig &base_config) { + auto manual_config = + json::parseOptions(options, /* considerFiles = */ false); + json::merge_internal( + base_config.config, + manual_config.config, + /* do_prune = */ true); + base_config.originallySpecifiedAs = + (options.empty() || options == "{}") + ? base_config.originallySpecifiedAs + : manual_config.originallySpecifiedAs; + }); +} + +template <> +json::ParsedConfig Parameter::compileJSONConfig( + Writable const *writable, + json::JsonMatcher &jsonMatcher, + std::string const &backendName) +{ + auto attri = writable->attributable->asInternalCopyOf(); + return doCompileJSONConfig( + attri, jsonMatcher, backendName, [](auto const &) {}); +} + namespace internal { std::string operationAsString(Operation op) diff --git a/src/IO/JSON/JSONIOHandler.cpp b/src/IO/JSON/JSONIOHandler.cpp index d2a6217eb5..2102837142 100644 --- a/src/IO/JSON/JSONIOHandler.cpp +++ b/src/IO/JSON/JSONIOHandler.cpp @@ -26,26 +26,28 @@ namespace openPMD JSONIOHandler::~JSONIOHandler() = default; JSONIOHandler::JSONIOHandler( + std::optional> initialize_from, std::string path, Access at, openPMD::json::TracingJSON jsonCfg, JSONIOHandlerImpl::FileFormat format, std::string originalExtension) - : AbstractIOHandler{std::move(path), at} - , m_impl{this, std::move(jsonCfg), format, std::move(originalExtension)} + : AbstractIOHandler{std::move(initialize_from), std::move(path), at, std::move(jsonCfg)} + , m_impl{this, format, std::move(originalExtension)} {} #if openPMD_HAVE_MPI JSONIOHandler::JSONIOHandler( + std::optional> initialize_from, std::string path, Access at, MPI_Comm comm, openPMD::json::TracingJSON jsonCfg, JSONIOHandlerImpl::FileFormat format, std::string originalExtension) - : AbstractIOHandler{std::move(path), at} - , m_impl{JSONIOHandlerImpl{ - this, comm, std::move(jsonCfg), format, std::move(originalExtension)}} + : AbstractIOHandler{std::move(initialize_from), std::move(path), at, std::move(jsonCfg)} + , m_impl{ + JSONIOHandlerImpl{this, comm, format, std::move(originalExtension)}} {} #endif diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index a432737188..fb33d1cd29 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -21,12 +21,12 @@ #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" #include "openPMD/Datatype.hpp" -#include "openPMD/DatatypeHelpers.hpp" #include "openPMD/Error.hpp" #include "openPMD/IO/AbstractIOHandler.hpp" #include "openPMD/IO/AbstractIOHandlerImpl.hpp" #include "openPMD/ThrowError.hpp" #include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/auxiliary/JSONMatcher.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/StringManip.hpp" @@ -387,21 +387,19 @@ JSONIOHandlerImpl::getBackendConfig(openPMD::json::TracingJSON &config) const JSONIOHandlerImpl::JSONIOHandlerImpl( AbstractIOHandler *handler, - openPMD::json::TracingJSON config, FileFormat format, std::string originalExtension) : AbstractIOHandlerImpl(handler) , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} { - init(std::move(config)); + init(handler->jsonMatcher->getDefault(backendConfigKey())); } #if openPMD_HAVE_MPI JSONIOHandlerImpl::JSONIOHandlerImpl( AbstractIOHandler *handler, MPI_Comm comm, - openPMD::json::TracingJSON config, FileFormat format, std::string originalExtension) : AbstractIOHandlerImpl(handler) @@ -409,7 +407,7 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} { - init(std::move(config)); + init(handler->jsonMatcher->getDefault(backendConfigKey())); } #endif diff --git a/src/Series.cpp b/src/Series.cpp index c0733e34cc..47a642a88d 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -841,6 +841,7 @@ void Series::init( std::unique_ptr parsed_input, json::TracingJSON tracing_json) { auto io_handler = createIOHandler( + std::nullopt, parsed_input->path, at, parsed_input->format, @@ -882,7 +883,11 @@ void Series::init( true, std::forward(comm)...); + auto &writable = s.get()->m_writable; + auto io_handler = createIOHandler( + writable.IOHandler ? std::move(*writable.IOHandler) + : std::nullopt, parsed_input->path, at, parsed_input->format, @@ -1053,25 +1058,18 @@ void Series::initSeries( auto &writable = series->m_writable; /* - * In Access modes READ_LINEAR and APPEND, the Series constructor might have - * emplaced a temporary IOHandler. Check if this is the case. + * In access modes APPEND and READ_LINEAR, a dummy IO Handler might have + * been emplaced. The real IO Handler (ioHandler) was created from this + * intermediate handler, moving from it. + * In that case, the pointer is still valid, it just points to an empty + * optional at the moment. Reuse the pointer, so that any objects that + * might have been initialized with the old pointer are still valid. */ if (writable.IOHandler) { - if (writable.IOHandler->has_value()) - { - /* - * A temporary IOHandler has been used. In this case, copy the - * values from that IOHandler over into the real one. - */ - ioHandler->operator=(***writable.IOHandler); - *writable.IOHandler = std::move(ioHandler); - } - else - { - throw error::Internal( - "Control flow error. This should not happen."); - } + *writable.IOHandler = + std::make_optional>( + std::move(ioHandler)); } else { @@ -1081,7 +1079,7 @@ void Series::initSeries( } series.iterations.linkHierarchy(writable); - series.iterations.writable().ownKeyWithinParent = "iterations"; + series.iterations.writable().ownKeyWithinParent = "data"; series.m_rankTable.m_attributable.linkHierarchy(writable); series.m_name = input->name; diff --git a/src/auxiliary/JSON.cpp b/src/auxiliary/JSON.cpp index 21d5e6c276..84e7006cbb 100644 --- a/src/auxiliary/JSON.cpp +++ b/src/auxiliary/JSON.cpp @@ -578,6 +578,12 @@ nlohmann::json &lowerCase(nlohmann::json &json) * We use "\vnum" to indicate "any array index". */ "\vnum", + "parameters"}, + {"adios2", + "dataset", + "\vnum", + "operators", + "\vnum", "parameters"}}; for (auto const &ignored : ignoredPaths) { @@ -621,17 +627,12 @@ std::optional asLowerCaseStringDynamic(nlohmann::json const &value) return maybeString; } -std::vector backendKeys() -{ - return {"adios2", "json", "toml", "hdf5"}; -} - void warnGlobalUnusedOptions(TracingJSON const &config) { auto shadow = config.invertShadow(); // The backends are supposed to deal with this // Only global options here - for (auto const &backendKey : json::backendKeys()) + for (auto const &backendKey : json::backendKeys) { shadow.erase(backendKey); } @@ -656,8 +657,8 @@ void warnGlobalUnusedOptions(TracingJSON const &config) } } -nlohmann::json & -merge(nlohmann::json &defaultVal, nlohmann::json const &overwrite) +nlohmann::json &merge_internal( + nlohmann::json &defaultVal, nlohmann::json const &overwrite, bool do_prune) { if (defaultVal.is_object() && overwrite.is_object()) { @@ -665,15 +666,18 @@ merge(nlohmann::json &defaultVal, nlohmann::json const &overwrite) for (auto it = overwrite.begin(); it != overwrite.end(); ++it) { auto &valueInDefault = defaultVal[it.key()]; - merge(valueInDefault, it.value()); - if (valueInDefault.is_null()) + merge_internal(valueInDefault, it.value(), do_prune); + if (do_prune && valueInDefault.is_null()) { prunedKeys.push(it.key()); } } - for (; !prunedKeys.empty(); prunedKeys.pop()) + if (do_prune) { - defaultVal.erase(prunedKeys.front()); + for (; !prunedKeys.empty(); prunedKeys.pop()) + { + defaultVal.erase(prunedKeys.front()); + } } } else @@ -693,11 +697,22 @@ merge(nlohmann::json &defaultVal, nlohmann::json const &overwrite) return defaultVal; } -std::string merge(std::string const &defaultValue, std::string const &overwrite) +template +std::string merge_impl( + std::string const &defaultValue, + std::string const &overwrite, + MPI_Comm_t &&...comm) { - auto [res, returnFormat] = - parseOptions(defaultValue, /* considerFiles = */ false); - merge(res, parseOptions(overwrite, /* considerFiles = */ false).config); + auto res = parseOptions( + defaultValue, + std::forward(comm)..., + /* considerFiles = */ true) + .config; + auto [second, returnFormat] = parseOptions( + overwrite, + std::forward(comm)..., + /* considerFiles = */ true); + merge_internal(res, second, /* do_prune = */ true); switch (returnFormat) { case SupportedLanguages::JSON: @@ -713,6 +728,21 @@ std::string merge(std::string const &defaultValue, std::string const &overwrite) throw std::runtime_error("Unreachable!"); } +std::string merge(std::string const &defaultValue, std::string const &overwrite) +{ + return merge_impl(defaultValue, overwrite); +} + +#if openPMD_HAVE_MPI +std::string merge( + std::string const &defaultValue, + std::string const &overwrite, + MPI_Comm comm) +{ + return merge_impl(defaultValue, overwrite, comm); +} +#endif + nlohmann::json & filterByTemplate(nlohmann::json &defaultVal, nlohmann::json const &positiveMask) { diff --git a/src/auxiliary/JSONMatcher.cpp b/src/auxiliary/JSONMatcher.cpp new file mode 100644 index 0000000000..689fbc8333 --- /dev/null +++ b/src/auxiliary/JSONMatcher.cpp @@ -0,0 +1,288 @@ +#include "openPMD/auxiliary/JSONMatcher.hpp" +#include "openPMD/Error.hpp" +#include "openPMD/auxiliary/JSON_internal.hpp" + +#include +#include +#include +#include + +namespace openPMD::json +{ +// Anonymous namespace so these helpers don't get exported +namespace +{ + /** + * @brief Read a single JSON pattern of the form {"select": ..., "cfg": ...} + * + * The "select" key is optional, indicating the default configuration if it + * is missing. + * + * @param backend_name For error messages. + * @param index_in_list For error messages. + * @param patterns Output parameter: Emplace a parsed pattern into this + * list. + * @param defaultConfig Output parameter: If the pattern was the default + * pattern, emplace it here. + * @param object The JSON object that is parsed as the pattern. + * @return Whether the pattern was the default configuration or not. + */ + auto readPattern( + std::string const &backend_name, + size_t index_in_list, + std::vector &patterns, + std::optional &defaultConfig, + nlohmann::json object) -> void; +} // namespace + +Pattern::Pattern(std::string const &pattern_in, nlohmann::json config_in) + : config(std::move(config_in)) +{ + // transform the regex such that the path to the Iteration is optional + std::stringstream build_pattern; + build_pattern << "(/data/[0-9]+/)?(" << pattern_in << ")"; + // we construct the patterns once and use them often, so let's ask for + // some optimization + pattern = std::regex( + build_pattern.str(), + std::regex_constants::egrep | std::regex_constants::optimize); +} + +void MatcherPerBackend::init(TracingJSON tracing_config) +{ + auto &config = tracing_config.json(); + if (config.is_object()) + { + return; + } + else if (config.is_array()) + { + m_patterns.reserve(config.size()); + std::optional defaultConfig; + // enhanced PIConGPU-defined layout + for (size_t i = 0; i < config.size(); ++i) + { + readPattern( + backendName, + i, + m_patterns, + defaultConfig, + std::move(config.at(i))); + } + // now replace the pattern list with the default config + tracing_config.json() = + std::move(defaultConfig).value_or(nlohmann::json::object()); + } + else + { + throw error::BackendConfigSchema( + {backendName, "dataset"}, "Expecting an object or an array."); + } +} + +/** + * @brief Get the JSON config associated with a regex pattern. + * + * @param datasetPath The regex. + * @return The matched JSON configuration, as a string. + */ +nlohmann::json const & +MatcherPerBackend::get(std::string const &datasetPath) const +{ + for (auto const &pattern : m_patterns) + { + if (std::regex_match(datasetPath, pattern.pattern)) + { + return pattern.config; + } + } + static nlohmann::json const emptyConfig; // null + return emptyConfig; +} + +auto JsonMatcher::init() -> void +{ + // Copy required since the config will be modified + if (!m_entireConfig.json().is_object()) + { + throw error::BackendConfigSchema( + {}, "Expected an object for the JSON configuration."); + } +} + +MatcherPerBackend::MatcherPerBackend() = default; + +MatcherPerBackend::MatcherPerBackend( + std::string backendName_in, TracingJSON config) + : backendName(std::move(backendName_in)) +{ + init(std::move(config)); +} + +JsonMatcher::JsonMatcher() = default; + +JsonMatcher::JsonMatcher(TracingJSON entireConfig) + : m_entireConfig(std::move(entireConfig)) +{ + init(); +} + +auto JsonMatcher::get( + std::string const &datasetPath, std::string const &backendName) + -> ParsedConfig +{ + initBackendLazily(backendName); + + nlohmann::json result = nlohmann::json::object(); + // might not have been initialized due to unspecified configuration + if (m_backendMatcher.backendName == backendName) + { + auto const &datasetConfig = m_backendMatcher.get(datasetPath); + if (!datasetConfig.empty()) + { + result[backendName]["dataset"] = datasetConfig; + } + } + + return {result, m_entireConfig.originallySpecifiedAs}; +} + +auto JsonMatcher::getDefault(std::string const &backendName) -> TracingJSON +{ + initBackendLazily(backendName); + return m_entireConfig; +} + +auto JsonMatcher::initBackendLazily(std::string const &backendName) -> void +{ + if (m_backendMatcher.backendName == backendName) + { + // already initialized + return; + } + if (!m_entireConfig.json().contains(backendName)) + { + return; + } + auto const &backendConfig = m_entireConfig.json({backendName}); + if (!backendConfig.is_object()) + { + throw error::BackendConfigSchema( + {backendName}, + "Each backend's configuration must be a JSON object (config " + "for backend " + + backendName + ")."); + } + else if (!backendConfig.contains("dataset")) + { + return; + } + m_backendMatcher = + MatcherPerBackend(backendName, m_entireConfig[backendName]["dataset"]); +} + +namespace +{ + auto readPattern( + std::string const &backend_name, + size_t index_in_list, + std::vector &patterns, + std::optional &defaultConfig, + nlohmann::json object) -> void + { + constexpr char const *errorMsg = R"END( +Each single pattern in an dataset-specific JSON/TOML configuration must be +an object with mandatory key 'cfg' and optional key 'select'. +When the key 'select' is not specified, the given configuration is used +for setting up the default dataset configuration upon backend initialization. +The key 'select' must point to either a single string or an array of strings +and is interpreted as a regular expression against which the dataset name +(full path or path within an iteration) must match.)END"; + auto throw_up = [&](std::string const &additional_info, + auto &&...additional_path) { + throw error::BackendConfigSchema( + {backend_name, + "dataset", + std::to_string(index_in_list), + additional_path...}, + additional_info + errorMsg); + }; + + if (!object.is_object()) + { + throw_up("Not an object!"); + } + if (!object.contains("cfg")) + { + throw_up("Mandatory key missing: 'cfg'!"); + } + { + std::vector unrecognized_keys; + for (auto it = object.begin(); it != object.end(); ++it) + { + if (it.key() == "select" || it.key() == "cfg") + { + continue; + } + unrecognized_keys.emplace_back(it.key()); + } + if (!unrecognized_keys.empty()) + { + std::cerr << "[Warning] JSON/TOML config at '" << backend_name + << ".dataset." << index_in_list + << "' has unrecognized keys:"; + for (auto const &item : unrecognized_keys) + { + std::cerr << " '" << item << '\''; + } + std::cerr << '.' << std::endl; + } + } + + nlohmann::json &cfg = object.at("cfg"); + if (!object.contains("select")) + { + if (defaultConfig.has_value()) + { + throw_up("Specified more than one default configuration!"); + } + defaultConfig.emplace(std::move(cfg)); + return; + } + else + { + nlohmann::json const &pattern = object.at("select"); + std::string pattern_str = [&]() -> std::string { + if (pattern.is_string()) + { + return pattern.get(); + } + else if (pattern.is_array()) + { + std::stringstream res; + res << "($^)"; + for (auto const &sub_pattern : pattern) + { + if (!sub_pattern.is_string()) + { + throw_up( + "Must be a string or an array of string!", + "select"); + } + res << "|(" << sub_pattern.get() << ")"; + } + return res.str(); + } + else + { + throw_up( + "Must be a string or an array of string!", "select"); + throw std::runtime_error("Unreachable!"); + } + }(); + patterns.emplace_back(pattern_str, std::move(cfg)); + return; + } + } +} // namespace +} // namespace openPMD::json diff --git a/src/backend/Attributable.cpp b/src/backend/Attributable.cpp index da9e09e2e0..ade77e24d5 100644 --- a/src/backend/Attributable.cpp +++ b/src/backend/Attributable.cpp @@ -213,17 +213,14 @@ std::string Attributable::MyPath::openPMDPath() const { if (group.empty()) { - return std::string(); + return std::string("/"); } else { std::stringstream res; - auto it = group.begin(); - auto end = group.end(); - res << *it++; - for (; it != end; ++it) + for (auto const &element : group) { - res << '/' << *it; + res << '/' << element; } return res.str(); } diff --git a/src/binding/python/Attributable.cpp b/src/binding/python/Attributable.cpp index 206e3741aa..59131704c0 100644 --- a/src/binding/python/Attributable.cpp +++ b/src/binding/python/Attributable.cpp @@ -502,7 +502,9 @@ void init_Attributable(py::module &m) "series_extension", &Attributable::MyPath::seriesExtension) .def_readonly("group", &Attributable::MyPath::group) .def_readonly("access", &Attributable::MyPath::access) - .def_property_readonly("file_path", &Attributable::MyPath::filePath); + .def_property_readonly("file_path", &Attributable::MyPath::filePath) + .def_property_readonly( + "openPMD_path", &Attributable::MyPath::openPMDPath); py::class_(m, "Attributable") .def(py::init()) diff --git a/src/binding/python/Series.cpp b/src/binding/python/Series.cpp index d737dc8198..11ad2ae651 100644 --- a/src/binding/python/Series.cpp +++ b/src/binding/python/Series.cpp @@ -423,12 +423,7 @@ Look for the WriteIterations class for further documentation. return series; }); - m.def( - "merge_json", - &json::merge, - py::arg("default_value") = "{}", - py::arg("overwrite") = "{}", - R"END( + constexpr char const *docs_merge_json = &R"END( Merge two JSON/TOML datasets into one. Merging rules: @@ -455,10 +450,48 @@ users to overwrite default options, while keeping any other ones. Parameters: * default_value: A string containing either a JSON or a TOML dataset. + If the string begins with an `@`, the JSON/TOML dataset will be + read from the filesystem at the specified path. + An MPI communicator can be passed to read in parallel. * overwrite: A string containing either a JSON or TOML dataset (does not need to be the same as `defaultValue`). + If the string begins with an `@`, the JSON/TOML dataset will be + read from the filesystem at the specified path. + An MPI communicator can be passed to read in parallel. * returns: The merged dataset, according to the above rules. - If `defaultValue` was a JSON dataset, then as a JSON string, + If `overwrite` was a JSON dataset, then as a JSON string, otherwise as a TOML string. - )END"); + )END"[1]; + + m.def( + "merge_json", + py::overload_cast( + &json::merge), + py::arg("default_value") = "{}", + py::arg("overwrite") = "{}", + docs_merge_json) +#if openPMD_HAVE_MPI + .def( + "merge_json", + [](std::string const &default_value, + std::string const &overwrite, + py::object &comm) { + auto variant = pythonObjectAsMpiComm(comm); + if (auto errorMsg = std::get_if(&variant)) + { + throw std::runtime_error("[merge_json] " + *errorMsg); + } + else + { + py::gil_scoped_release release; + return json::merge( + default_value, overwrite, std::get(variant)); + } + }, + py::arg("default_value") = "{}", + py::arg("overwrite") = "{}", + py::arg("comm"), + docs_merge_json) +#endif + ; } diff --git a/test/AuxiliaryTest.cpp b/test/AuxiliaryTest.cpp index 7cc88f3b65..e863dd1527 100644 --- a/test/AuxiliaryTest.cpp +++ b/test/AuxiliaryTest.cpp @@ -35,7 +35,8 @@ struct TestHelper : public Attributable { writable().IOHandler = std::make_shared>>( - createIOHandler(".", Access::CREATE, Format::JSON, ".json")); + createIOHandler( + std::nullopt, ".", Access::CREATE, Format::JSON, ".json")); } }; } // namespace openPMD::test @@ -150,7 +151,8 @@ TEST_CASE("container_default_test", "[auxiliary]") Container c = Container(); c.writable().IOHandler = std::make_shared>>( - createIOHandler(".", Access::CREATE, Format::JSON, ".json")); + createIOHandler( + std::nullopt, ".", Access::CREATE, Format::JSON, ".json")); REQUIRE(c.empty()); REQUIRE(c.erase("nonExistentKey") == false); @@ -189,7 +191,8 @@ TEST_CASE("container_retrieve_test", "[auxiliary]") Container c = Container(); c.writable().IOHandler = std::make_shared>>( - createIOHandler(".", Access::CREATE, Format::JSON, ".json")); + createIOHandler( + std::nullopt, ".", Access::CREATE, Format::JSON, ".json")); structure s; std::string text = @@ -263,7 +266,8 @@ TEST_CASE("container_access_test", "[auxiliary]") Container c = Container(); c.writable().IOHandler = std::make_shared>>( - createIOHandler(".", Access::CREATE, Format::JSON, ".json")); + createIOHandler( + std::nullopt, ".", Access::CREATE, Format::JSON, ".json")); c["1firstWidget"] = Widget(0); REQUIRE(c.size() == 1); diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp index c5d98a73c5..0239e5bab9 100644 --- a/test/CoreTest.cpp +++ b/test/CoreTest.cpp @@ -207,81 +207,73 @@ TEST_CASE("myPath", "[core]") Series series("../samples/myPath.json", Access::CREATE); REQUIRE(pathOf(series) == vec_t{}); auto iteration = series.iterations[1234]; - REQUIRE(pathOf(iteration) == vec_t{"iterations", "1234"}); + REQUIRE(pathOf(iteration) == vec_t{"data", "1234"}); auto writeSomething = [](auto &recordComponent) { recordComponent.resetDataset({Datatype::INT, {100}}); recordComponent.template makeConstant(5678); }; - REQUIRE(pathOf(iteration.meshes) == vec_t{"iterations", "1234", "meshes"}); + REQUIRE(pathOf(iteration.meshes) == vec_t{"data", "1234", "meshes"}); auto scalarMesh = iteration.meshes["e_chargeDensity"]; REQUIRE( pathOf(scalarMesh) == - vec_t{"iterations", "1234", "meshes", "e_chargeDensity"}); + vec_t{"data", "1234", "meshes", "e_chargeDensity"}); auto scalarMeshComponent = scalarMesh[RecordComponent::SCALAR]; REQUIRE( pathOf(scalarMeshComponent) == - vec_t{"iterations", "1234", "meshes", "e_chargeDensity"}); + vec_t{"data", "1234", "meshes", "e_chargeDensity"}); writeSomething(scalarMeshComponent); auto vectorMesh = iteration.meshes["E"]; - REQUIRE(pathOf(vectorMesh) == vec_t{"iterations", "1234", "meshes", "E"}); + REQUIRE(pathOf(vectorMesh) == vec_t{"data", "1234", "meshes", "E"}); auto vectorMeshComponent = vectorMesh["x"]; REQUIRE( pathOf(vectorMeshComponent) == - vec_t{"iterations", "1234", "meshes", "E", "x"}); + vec_t{"data", "1234", "meshes", "E", "x"}); - REQUIRE( - pathOf(iteration.particles) == - vec_t{"iterations", "1234", "particles"}); + REQUIRE(pathOf(iteration.particles) == vec_t{"data", "1234", "particles"}); auto speciesE = iteration.particles["e"]; - REQUIRE(pathOf(speciesE) == vec_t{"iterations", "1234", "particles", "e"}); + REQUIRE(pathOf(speciesE) == vec_t{"data", "1234", "particles", "e"}); auto speciesPosition = speciesE["position"]; REQUIRE( pathOf(speciesPosition) == - vec_t{"iterations", "1234", "particles", "e", "position"}); + vec_t{"data", "1234", "particles", "e", "position"}); auto speciesPositionX = speciesPosition["x"]; REQUIRE( pathOf(speciesPositionX) == - vec_t{"iterations", "1234", "particles", "e", "position", "x"}); + vec_t{"data", "1234", "particles", "e", "position", "x"}); writeSomething(speciesPositionX); auto speciesWeighting = speciesE["weighting"]; REQUIRE( pathOf(speciesWeighting) == - vec_t{"iterations", "1234", "particles", "e", "weighting"}); + vec_t{"data", "1234", "particles", "e", "weighting"}); auto speciesWeightingX = speciesWeighting[RecordComponent::SCALAR]; REQUIRE( pathOf(speciesWeightingX) == - vec_t{"iterations", "1234", "particles", "e", "weighting"}); + vec_t{"data", "1234", "particles", "e", "weighting"}); writeSomething(speciesWeightingX); REQUIRE( pathOf(speciesE.particlePatches) == - vec_t{"iterations", "1234", "particles", "e", "particlePatches"}); + vec_t{"data", "1234", "particles", "e", "particlePatches"}); auto patchExtent = speciesE.particlePatches["extent"]; REQUIRE( pathOf(patchExtent) == - vec_t{ - "iterations", - "1234", - "particles", - "e", - "particlePatches", - "extent"}); + vec_t{"data", "1234", "particles", "e", "particlePatches", "extent"}); auto patchExtentX = patchExtent["x"]; REQUIRE( pathOf(patchExtentX) == vec_t{ - "iterations", + "data", "1234", "particles", "e", @@ -293,7 +285,7 @@ TEST_CASE("myPath", "[core]") REQUIRE( pathOf(patchNumParticles) == vec_t{ - "iterations", + "data", "1234", "particles", "e", @@ -305,7 +297,7 @@ TEST_CASE("myPath", "[core]") REQUIRE( pathOf(patchNumParticlesComponent) == vec_t{ - "iterations", + "data", "1234", "particles", "e", diff --git a/test/JSONTest.cpp b/test/JSONTest.cpp index 067919bd89..304ad16e15 100644 --- a/test/JSONTest.cpp +++ b/test/JSONTest.cpp @@ -217,8 +217,8 @@ right = "val" }(); REQUIRE(json::merge(leftJson, rightJson) == resJson); - REQUIRE(json::merge(leftJson, rightToml) == resJson); - REQUIRE(sort_lines(json::merge(leftToml, rightJson)) == resToml); + REQUIRE(sort_lines(json::merge(leftJson, rightToml)) == resToml); + REQUIRE(json::merge(leftToml, rightJson) == resJson); REQUIRE(sort_lines(json::merge(leftToml, rightToml)) == resToml); } } diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index 59d8bf8570..9b44a25e9b 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -274,11 +274,27 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]") MPI_Comm_rank(MPI_COMM_WORLD, &mpi_r); auto mpi_size = static_cast(mpi_s); auto mpi_rank = static_cast(mpi_r); + std::string chunking_config = "[" + std::to_string(mpi_size) + "]"; + // clang-format off + std::string config = R"( + [hdf5] + independent_stores = false + + # default config + [[hdf5.dataset]] + cfg = {chunks = "none"} + + [[hdf5.dataset]] + select = "particles/.*/position/.*" + cfg = {chunks = [1]} + + [[hdf5.dataset]] + select = "particles/.*/positionOffset/x" + cfg = {chunks = )" + chunking_config + R"(} + )"; + // clang-format on Series o = Series( - "../samples/parallel_write.h5", - Access::CREATE, - MPI_COMM_WORLD, - "hdf5.independent_stores = false"); + "../samples/parallel_write.h5", Access::CREATE, MPI_COMM_WORLD, config); o.setAuthor("Parallel HDF5"); ParticleSpecies &e = o.iterations[1].particles["e"]; @@ -291,10 +307,8 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]") std::shared_ptr position_local(new double); *position_local = position_global[mpi_rank]; - e["position"]["x"].resetDataset(Dataset( - determineDatatype(position_local), - {mpi_size}, - "hdf5.dataset.chunks = [1]")); + e["position"]["x"].resetDataset( + Dataset(determineDatatype(position_local), {mpi_size})); e["position"]["x"].storeChunk(position_local, {mpi_rank}, {1}); o.flush("hdf5.independent_stores = true"); @@ -308,10 +322,8 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]") std::shared_ptr positionOffset_local(new uint64_t); *positionOffset_local = positionOffset_global[mpi_rank]; - e["positionOffset"]["x"].resetDataset(Dataset( - determineDatatype(positionOffset_local), - {mpi_size}, - "hdf5.dataset.chunks = [" + std::to_string(mpi_size) + "]")); + e["positionOffset"]["x"].resetDataset( + Dataset(determineDatatype(positionOffset_local), {mpi_size})); e["positionOffset"]["x"].storeChunk(positionOffset_local, {mpi_rank}, {1}); // Test that chunking settings are not carried over to other datasets. @@ -870,8 +882,20 @@ void file_based_write_read(std::string const &file_ending) }); { + std::string chunking_config = "[" + std::to_string(global_Nx) + ", " + + std::to_string(local_Nz) + "]"; + // clang-format off + std::string out_config = R"( + [[hdf5.dataset]] + cfg = {chunks = "auto"} + [[hdf5.dataset]] + select = "meshes/E/.*" + cfg = {chunks = )" + chunking_config + R"(} +)"; + // clang-format on + // open a parallel series - Series series(name, Access::CREATE, MPI_COMM_WORLD); + Series series(name, Access::CREATE, MPI_COMM_WORLD, out_config); series.setIterationEncoding(IterationEncoding::fileBased); int const last_step = 100; @@ -908,10 +932,7 @@ void file_based_write_read(std::string const &file_ending) }); auto dataset = io::Dataset( - io::determineDatatype(), - {global_Nx, global_Nz}, - "hdf5.dataset.chunks = [" + std::to_string(global_Nx) + ", " + - std::to_string(local_Nz) + "]"); + io::determineDatatype(), {global_Nx, global_Nz}); E_x.resetDataset(dataset); Offset chunk_offset = {0, size_t(local_Nz) * mpi_rank};