diff --git a/CHANGELOG.md b/CHANGELOG.md index 54877af..61fb5d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## v0.6.1 +- Add pyFANS support for large strain and fix pyFANS MPI [#124](https://github.com/DataAnalyticsEngineering/FANS/pull/124) - Adds support to write integration point data to disk [#117](https://github.com/DataAnalyticsEngineering/FANS/pull/117) ## v0.6.0 diff --git a/CMakeLists.txt b/CMakeLists.txt index 85a7736..91523d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,6 +112,9 @@ find_package(nlohmann_json REQUIRED) # TARGETS # ############################################################################## +set(FANS_VERBOSITY 5 CACHE STRING "Set verbosity level: 0 none, 5 all") +add_definitions(-DVERBOSITY=${FANS_VERBOSITY}) + option(FANS_BUILD_STATIC "Build static library" OFF) if (FANS_BUILD_STATIC) add_library(FANS_FANS STATIC) @@ -150,6 +153,7 @@ target_include_directories(FANS_FANS PUBLIC $ Matmodel *createMatmodel(const Reader &reader); @@ -25,7 +27,7 @@ struct MaterialInfo { * */ template -class MaterialManager { +class MaterialManager : public Serializable { private: MaterialInfo *phase_to_info{nullptr}; // [n_phases] - HOT DATA int n_phases; @@ -110,51 +112,47 @@ class MaterialManager { compute_reference_stiffness(reader); // Print detailed information about material configuration for logging - if (reader.world_rank == 0) { - printf("\n# MaterialManager initialized:\n"); - printf("# Number of material models: %zu\n", models.size()); - printf("# Number of phases: %d\n#\n", n_phases); - - for (size_t i = 0; i < mats.size(); ++i) { - const auto &mg = mats[i]; - printf("# Material Model %zu: %s\n", i + 1, mg["matmodel"].get().c_str()); - - // Print phases - auto phases = mg["phases"].get>(); - printf("# Phases: ["); - for (size_t j = 0; j < phases.size(); ++j) { - printf("%d", phases[j]); - if (j < phases.size() - 1) - printf(", "); - } - printf("]\n"); - - // Print material properties - printf("# Material properties:\n"); - const auto &props = mg["material_properties"]; - for (auto it = props.begin(); it != props.end(); ++it) { - printf("# %s: ", it.key().c_str()); - if (it.value().is_array()) { - printf("["); - for (size_t k = 0; k < it.value().size(); ++k) { - if (it.value()[k].is_number()) { - printf("%.5g", it.value()[k].get()); - } else if (it.value()[k].is_string()) { - printf("\"%s\"", it.value()[k].get().c_str()); - } - if (k < it.value().size() - 1) - printf(", "); + Log::io->info() << "\n# MaterialManager initialized:\n"; + Log::io->info() << "# Number of material models: " << models.size() << "\n"; + Log::io->info() << "# Number of phases: " << n_phases << "\n#\n"; + + for (size_t i = 0; i < mats.size(); ++i) { + const auto &mg = mats[i]; + Log::io->info() << "# Material Model " << i + 1 << ": " << mg["matmodel"].get() << "\n"; + + // Print phases + auto phases = mg["phases"].get>(); + Log::io->info() << "# Phases: ["; + for (size_t j = 0; j < phases.size(); ++j) { + Log::io->info(true) << phases[j]; + if (j < phases.size() - 1) Log::io->info(true) << ", "; + } + Log::io->info(true) << "]\n"; + + // Print material properties + Log::io->info() << "# Material properties:\n"; + const auto &props = mg["material_properties"]; + for (auto it = props.begin(); it != props.end(); ++it) { + Log::io->info() << "# " << it.key() << ": "; + if (it.value().is_array()) { + Log::io->info(true) << "["; + for (size_t k = 0; k < it.value().size(); ++k) { + if (it.value()[k].is_number()) { + Log::io->info(true) << std::setprecision(5) << it.value()[k].get() << std::defaultfloat; + } else if (it.value()[k].is_string()) { + Log::io->info(true) << "\"" << it.value()[k].get() << "\""; } - printf("]"); - } else if (it.value().is_number()) { - printf("%.5g", it.value().get()); - } else if (it.value().is_string()) { - printf("\"%s\"", it.value().get().c_str()); + if (k < it.value().size() - 1) Log::io->info(true) << ", "; } - printf("\n"); + Log::io->info(true) << "]"; + } else if (it.value().is_number()) { + Log::io->info(true) << std::setprecision(5) << it.value().get() << std::defaultfloat; + } else if (it.value().is_string()) { + Log::io->info(true) << "\"" << it.value().get() << "\""; } - printf("#\n"); + Log::io->info(true) << "\n"; } + Log::io->info() << "#\n"; } } @@ -204,6 +202,20 @@ class MaterialManager { } } + void register_serialization(registry_t &r) override + { + for (auto *model : models) { + model->register_serialization(r); + } + } + + void init_deserialization() override + { + for (auto *model : models) { + model->init_deserialization(); + } + } + // Update internal variables after converged time step void update_internal_variables() { @@ -213,7 +225,7 @@ class MaterialManager { } // Set macroscale loading gradient for all models - void set_gradient(vector g0) + void set_gradient(const vector& g0) { for (auto *model : models) { model->setGradient(g0); diff --git a/include/general.h b/include/general.h index 1128e8a..1949615 100644 --- a/include/general.h +++ b/include/general.h @@ -58,6 +58,4 @@ inline void FANS_free(V *p) } #endif // FANS_MALLOC_H -#define VERBOSITY 0 - // #define EIGEN_RUNTIME_NO_MALLOC diff --git a/include/logging.h b/include/logging.h new file mode 100755 index 0000000..734a669 --- /dev/null +++ b/include/logging.h @@ -0,0 +1,106 @@ +#ifndef LOGGING_H +#define LOGGING_H + +#include +#include +#include +#include +#include +#include "mpi.h" + +namespace Log { + +enum Level { + Error, + Info, + Warn, + Debug, + All +}; + +[[maybe_unused]] const std::string level_to_string(Level level); + +extern Level active_level; + +class Logger; +class MPI_TraceSync { + public: + explicit MPI_TraceSync(Logger &log, bool append); + ~MPI_TraceSync(); + + template + MPI_TraceSync &operator<<(const T &v); + MPI_TraceSync &operator<<(std::ostream &(*m)(std::ostream &) ); + + private: + Logger &_log; + bool _append; + std::ostringstream _buffer; +}; + +template +MPI_TraceSync &MPI_TraceSync::operator<<(const T &v) +{ + _buffer << v; + return *this; +} + +class Logger { + public: + friend class MPI_TraceSync; + explicit Logger(std::string prefix, int comm_rank, int comm_size, const MPI_Comm& comm); + + /// error > info > warn > debug > trace + std::ostream& error(bool append=false); + /// error > info > warn > debug > trace + std::ostream& info(bool append=false); + /// error > info > warn > debug > trace + std::ostream& warn(bool append=false); + /// error > info > warn > debug > trace + std::ostream& debug(bool append=false); + /// error > info > warn > debug > trace + MPI_TraceSync trace(bool append=false); + /// progress bar + void progress(const std::string& prefix, int step, int max); + + private: + std::ostream& trace_impl(bool append=false); + /// starting time + std::chrono::steady_clock::time_point _start_time; + /// what the logger should always print first + const std::string _prefix; + /// empty stream to write nothing + std::ofstream _nullstr; + /// MPI comm rank + int _comm_rank; + /// MPI comm size + int _comm_size; + /// communicator + MPI_Comm _comm; +}; + +/** + * Creates all loggers and sets the level + * */ +void init(int comm_rank, int comm_size, const MPI_Comm& comm); + +/** + * Frees all memory + * */ +void finalize(); + +/** + * Set activate rank + */ +[[maybe_unused]] void setActiveRank(int rank); + +/// logger with prefix [GENERAL] +extern std::unique_ptr general; +/// logger with prefix [SOLVER] +extern std::unique_ptr solver; +/// logger with prefix [IO] +extern std::unique_ptr io; + +}; // namespace Log + +#endif \ No newline at end of file diff --git a/include/material_models/J2Plasticity.h b/include/material_models/J2Plasticity.h index 33f2a69..e657a20 100644 --- a/include/material_models/J2Plasticity.h +++ b/include/material_models/J2Plasticity.h @@ -55,6 +55,17 @@ class J2Plasticity : public SmallStrainMechModel { psi_bar_t.resize(num_elements, Matrix::Zero(6, num_gauss_points)); } + void register_serialization(registry_t &r) override + { + SmallStrainMechModel::register_serialization(r); + for (auto &mat : plasticStrain_t) r.emplace_back(std::data(mat), (mat.size()) * sizeof(double), true); + for (auto &mat : plasticStrain) r.emplace_back(std::data(mat), (mat.size()) * sizeof(double), true); + for (auto &mat : psi_bar_t) r.emplace_back(std::data(mat), (mat.size()) * sizeof(double), true); + for (auto &mat : psi_bar) r.emplace_back(std::data(mat), (mat.size()) * sizeof(double), true); + for (auto &vec : psi_t) r.emplace_back(std::data(vec), (vec.size()) * sizeof(double), true); + for (auto &vec : psi) r.emplace_back(std::data(vec), (vec.size()) * sizeof(double), true); + } + virtual void updateInternalVariables() override { plasticStrain_t = plasticStrain; diff --git a/include/material_models/J2PlasticityNew.h b/include/material_models/J2PlasticityNew.h index e62996e..3f0e26c 100644 --- a/include/material_models/J2PlasticityNew.h +++ b/include/material_models/J2PlasticityNew.h @@ -34,6 +34,15 @@ class J2PlasticityNew : public SmallStrainMechModel { q_t.resize(num_elements, VectorXd::Zero(num_gauss_points)); } + void register_serialization(registry_t &r) override + { + SmallStrainMechModel::register_serialization(r); + for (auto &mat : plasticStrain_t) r.emplace_back(std::data(mat), (mat.size()) * sizeof(double), true); + for (auto &mat : plasticStrain) r.emplace_back(std::data(mat), (mat.size()) * sizeof(double), true); + for (auto &vec : q_t) r.emplace_back(std::data(vec), (vec.size()) * sizeof(double), true); + for (auto &vec : q) r.emplace_back(std::data(vec), (vec.size()) * sizeof(double), true); + } + virtual void updateInternalVariables() override { plasticStrain_t = plasticStrain; diff --git a/include/matmodel.h b/include/matmodel.h index bd3a82d..ffdb1bc 100644 --- a/include/matmodel.h +++ b/include/matmodel.h @@ -3,12 +3,13 @@ #define MATMODEL_H #include "general.h" +#include "serialization.h" template class Solver; template -class Matmodel { +class Matmodel : public Serializable { public: EIGEN_MAKE_ALIGNED_OPERATOR_NEW // see http://eigen.tuxfamily.org/dox-devel/group__TopicStructHavingEigenMembers.html @@ -27,7 +28,7 @@ class Matmodel { Matrix Compute_Reference_ElementStiffness(const Matrix &kapparef_mat); Matrix &element_residual(Matrix &ue, int mat_index, ptrdiff_t element_idx); void getStrainStress(double *strain, double *stress, Matrix &ue, int mat_index, ptrdiff_t element_idx); - void setGradient(vector _g0); + void setGradient(const vector &_g0); // Accessors for internal Gauss point data (populated after getStrainStress call) inline const double *get_eps_data() const @@ -53,6 +54,9 @@ class Matmodel { virtual ~Matmodel() = default; + void register_serialization(registry_t &r) override; + void init_deserialization() override; + protected: double l_e_x; double l_e_y; @@ -101,6 +105,37 @@ Matmodel::Matmodel(const Reader &reader) sigma.resize(n_str * n_gp); } +template +void Matmodel::register_serialization(registry_t &r) +{ + //r.emplace_back(&verbosity, sizeof(int), false); + //r.emplace_back(&n_mat, sizeof(int), false); + //r.emplace_back(&n_gp, sizeof(int), false); + + //r.emplace_back(std::data(FE_type), (FE_type.size() + 1) * sizeof(char), true); + //r.emplace_back(std::data(macroscale_loading), macroscale_loading.size() * sizeof(double), true); + + //r.emplace_back(&l_e_x, sizeof(double), false); + //r.emplace_back(&l_e_y, sizeof(double), false); + //r.emplace_back(&l_e_z, sizeof(double), false); + //r.emplace_back(&v_e, sizeof(double), false); + + //for (auto& mat : B_int) r.emplace_back(std::data(mat), mat.size() * sizeof(double), true); + + r.emplace_back(std::data(B), B.size() * sizeof(double), true); + r.emplace_back(std::data(eps), eps.size() * sizeof(double), true); + r.emplace_back(std::data(g0), g0.size() * sizeof(double), true); + r.emplace_back(std::data(sigma), sigma.size() * sizeof(double), true); + r.emplace_back(std::data(res_e), res_e.size() * sizeof(double), true); +} + +template +void Matmodel::init_deserialization() +{ + // need to allocate enough memory first + //FE_type = std::string("\0\0\0\0\0\0"); +} + template void Matmodel::Construct_B() { @@ -225,11 +260,13 @@ void Matmodel::getStrainStress(double *strain, double *stress, M } template -void Matmodel::setGradient(vector _g0) +void Matmodel::setGradient(const vector &_g0) { + // _g0 is smaller than g0, can stay in cache + // if j loop inner, then not using cache-lines well macroscale_loading = _g0; - for (int i = 0; i < n_str; i++) { - for (int j = 0; j < n_gp; ++j) { + for (int j = 0; j < n_gp; ++j) { + for (int i = 0; i < n_str; i++) { g0(n_str * j + i, 0) = _g0[i]; } } diff --git a/include/reader.h b/include/reader.h index 1e62249..e720704 100644 --- a/include/reader.h +++ b/include/reader.h @@ -11,16 +11,29 @@ #include "H5FDmpio.h" #include "mpi.h" +#include "serialization.h" + using namespace std; -class Reader { +class Reader : public Serializable { public: - // Default constructor - Reader() = default; + // Explicit default constructor + Reader(): + force_single_rank(false), communicator(nullptr), n_mat(0), + TOL(0), n_it(0), world_rank(0), world_size(0), alloc_local(0), + local_n0(0), local_0_start(0), local_n1(0), local_1_start(0) + { + manual_serialize = true; + manual_deserialize = true; + } // Destructor to free allocated memory ~Reader(); + // MPI controlls + bool force_single_rank; + MPI_Comm communicator; + // contents of input file: char ms_filename[4096]{}; // Name of Micro-structure hdf5 file char ms_datasetname[4096]{}; // Absolute path of Micro-structure in hdf5 file @@ -61,8 +74,11 @@ class Reader { // void Setup(ptrdiff_t howmany); void ReadInputFile(char input_fn[]); + void ReadJson(json j); void ReadMS(int hm); - void ComputeVolumeFractions(); + void ComputeVolumeFractions(); + length_t serialize_override(buffer_t &buffer, length_t offset) override; + length_t deserialize_override(buffer_t &buffer, length_t offset) override; // void ReadHDF5(char file_name[], char dset_name[]); void safe_create_group(hid_t file, const char *const name); void OpenResultsFile(const char *output_fn); // Open results file once diff --git a/include/serialization.h b/include/serialization.h new file mode 100644 index 0000000..4771241 --- /dev/null +++ b/include/serialization.h @@ -0,0 +1,164 @@ +// +// Created by Alex Hocks on 09/01/26. +// + +#ifndef SERIALIZATION_H +#define SERIALIZATION_H +#include "logging.h" + +#include +#include +#include + +template, typename r = std::conditional_t> +class SerializationBuffer : public Base { + // Buffer always has size multiple of 4 byte +public: + using Base::Base; + using size_type = typename Base::size_type; + void resize(size_type new_size) + { + const auto num_bytes = new_size * data_size; + const auto delta_bytes = (4 - (num_bytes % 4)) % 4; + new_size += delta_bytes / data_size; + Base::resize(new_size); + } + + void resize(size_type new_size, const T& value) + { + const auto num_bytes = new_size * data_size; + const auto delta_bytes = (4 - (num_bytes % 4)) % 4; + new_size += delta_bytes / data_size; + Base::resize(new_size, value); + } + + void add_size(size_type delta) { resize(Base::size() + delta); } + + std::vector& as_int_vector() { return *reinterpret_cast*>(this); } + + static SerializationBuffer& from_int_vector(std::vector& vec) { return *reinterpret_cast(&vec); } + +private: + static constexpr auto data_size = sizeof(T); +}; + +class Serializable { + /** + * Serializes any object that implements this. + * Buffer elements will be bytes, but total count will always be padded to be multiple of 4. + * Data is run length encoded. + */ +public: + // run length type + using length_t = uint64_t; + // run length block size + static constexpr auto header_len = sizeof(length_t); + // byte data type + using data_t = uint8_t; + // buffer type + using buffer_t = SerializationBuffer; + // registry type: pointer to data, data size, true: run length encoded, false: direct read write + using reg_entry_t = std::tuple; + using registry_t = std::vector; + + virtual ~Serializable() = default; + + /** + * Serializes this object + * @return serial buffer + */ + buffer_t serialize_full() + { + if (manual_deserialize) { + buffer_t buffer; + serialize_override(buffer, 0); + return buffer; + } + + Log::io->trace() << "Serialization::serialize_full() - register_serialization\n"; + registry_t registry; + register_serialization(registry); + + Log::io->trace() << "Serialization::serialize_full() - accumulate\n"; + const length_t total_size = std::accumulate(registry.begin(), registry.end(), 0UL, + [](const length_t acc, const reg_entry_t &e) { + const bool use_run_length = std::get<2>(e); + length_t size = std::get<1>(e); + if (use_run_length) size += header_len; + return acc + size; + }); + + buffer_t buffer; + buffer.resize(total_size); + Log::io->trace() << "Serialization::serialize_full() - buffer size=" << total_size << " addr=" << static_cast(std::data(buffer)) << "\n"; + + Log::io->trace() << "Serialization::serialize_full() - process entries\n"; + length_t offset = 0UL; + length_t pos = 0UL; + for (const reg_entry_t &e : registry) { + const bool use_run_length = std::get<2>(e); + length_t size = std::get<1>(e); + void* data = std::get<0>(e); + Log::io->trace() << "Serialization::serialize_full() - entry: pos=" << pos << " addr=" << data << " offset=" << offset << " useRL=" << use_run_length << " size=" << size << "\n"; + + if (use_run_length) { + *reinterpret_cast(std::data(buffer) + offset) = size; + offset += header_len; + } + std::memcpy(std::data(buffer) + offset, data, size); + offset += size; + pos++; + } + + Log::io->trace() << "Serialization::serialize_full() - done\n"; + return buffer; + } + + virtual void register_serialization(registry_t &r) {} + + virtual void init_deserialization() {} + + virtual registry_t::iterator late_init_deserialization(registry_t &r, registry_t::iterator begin) { return r.begin(); } + + bool manual_deserialize = false; + bool manual_serialize = false; + + /** + * Deserialize this object using provided data + * @param buffer + */ + void deserialize_full(buffer_t &buffer) + { + if (manual_deserialize) { + deserialize_override(buffer, 0); + return; + } + + init_deserialization(); // alloc mem if needed + + // gather serialization description + registry_t registry; + register_serialization(registry); + // place correct lengths into descriptor list, etc + late_init_deserialization(registry, registry.begin()); + + length_t offset = 0UL; + for (reg_entry_t &e : registry) { + const bool use_run_length = std::get<2>(e); + length_t &size = std::get<1>(e); + void* data = std::get<0>(e); + + if (use_run_length) { + size = *reinterpret_cast(std::data(buffer) + offset); + offset += header_len; + } + std::memcpy(data, std::data(buffer) + offset, size); + offset += size; + } + } + + virtual length_t deserialize_override(buffer_t &buffer, length_t offset) { return offset; } + virtual length_t serialize_override(buffer_t &buffer, length_t offset) { return offset; } +}; + +#endif diff --git a/include/solver.h b/include/solver.h index cb05d49..6cd7c37 100644 --- a/include/solver.h +++ b/include/solver.h @@ -3,21 +3,24 @@ #include "matmodel.h" #include "MaterialManager.h" +#include "logging.h" +#include "serialization.h" class J2Plasticity; typedef Map, Unaligned, OuterStride<>> RealArray; template -class Solver : private MixedBCController { +class Solver : MixedBCController, public Serializable { public: - Solver(Reader &reader, MaterialManager *matmanager); + explicit Solver(Reader &reader, MaterialManager *matmanager=nullptr); virtual ~Solver(); Reader &reader; const int world_rank; const int world_size; + MPI_Comm communicator; const ptrdiff_t n_x, n_y, n_z; // NOTE: the order in the declaration is very important because it is the same order in which the later initialization via member initializer lists takes place @@ -43,6 +46,10 @@ class Solver : private MixedBCController { ArrayXd err_all; //!< Absolute error history Matrix fundamentalSolution; + void init_fundamentalSolutionBuffer() + { + fundamentalSolution = Matrix(howmany, (local_n1 * n_x * (n_z / 2 + 1) * (howmany + 1)) / 2); + } template void iterateCubes(F f); @@ -62,6 +69,8 @@ class Solver : private MixedBCController { double compute_error(RealArray &r); void CreateFFTWPlans(double *in, fftw_complex *transformed, double *out); + void register_serialization(registry_t &r) override; + VectorXd homogenized_stress; VectorXd get_homogenized_stress(); @@ -108,6 +117,7 @@ Solver::Solver(Reader &reader, MaterialManager * matmanager(matmgr), world_rank(reader.world_rank), world_size(reader.world_size), + communicator(reader.communicator), n_x(reader.dims[0]), n_y(reader.dims[1]), n_z(reader.dims[2]), @@ -130,23 +140,26 @@ Solver::Solver(Reader &reader, MaterialManager * rhat((std::complex *) v_r, local_n1 * n_x * (n_z / 2 + 1) * howmany), // actual initialization is below buffer_padding(fftw_alloc_real(n_y * (n_z + 2) * howmany)) { + Log::solver->trace() << "Start constructing solver\n"; + v_u_real.setZero(); for (ptrdiff_t i = local_n0 * n_y * n_z * howmany; i < (local_n0 + 1) * n_y * n_z * howmany; i++) { this->v_u[i] = 0; } std::memset(v_u_prev, 0, local_n0 * n_y * n_z * howmany * sizeof(double)); - matmanager->initialize_internal_variables(local_n0 * n_y * n_z, matmanager->models[0]->n_gp); - - computeFundamentalSolution(); + if (matmanager != nullptr) { + Log::solver->trace() << "Init internal vars.\n"; + matmanager->initialize_internal_variables(local_n0 * n_y * n_z, matmanager->models[0]->n_gp); + computeFundamentalSolution(); + } } template void Solver::computeFundamentalSolution() { - if (world_rank == 0) { - printf("\n# Start creating Fundamental Solution(s) \n"); - } + Log::solver->info() << "\n# Start creating Fundamental Solution(s) \n"; + clock_t tot_time = clock(); Matrix Ker0 = matmanager->models[0]->Compute_Reference_ElementStiffness(matmanager->kapparef_mat); @@ -160,7 +173,7 @@ void Solver::computeFundamentalSolution() Matrix, 8, 1> A; Matrix AA; Matrix block; - fundamentalSolution = Matrix(howmany, (local_n1 * n_x * (n_z / 2 + 1) * (howmany + 1)) / 2); + init_fundamentalSolutionBuffer(); fundamentalSolution.setZero(); for (int i_y = 0; i_y < local_n1; ++i_y) { @@ -198,14 +211,13 @@ void Solver::computeFundamentalSolution() fundamentalSolution /= (double) (n_x * n_y * n_z); tot_time = clock() - tot_time; - if (world_rank == 0) { - printf("# Complete; Time for construction of Fundamental Solution(s): %f seconds\n", double(tot_time) / CLOCKS_PER_SEC); - } + Log::solver->info() << "# Complete; Time for construction of Fundamental Solution(s): " << static_cast(tot_time) / CLOCKS_PER_SEC << " seconds\n"; } template void Solver::CreateFFTWPlans(double *in, fftw_complex *transformed, double *out) { + Log::solver->trace() << "Solver::CreateFFTWPlans() begin \n"; int rank = 3; ptrdiff_t iblock = FFTW_MPI_DEFAULT_BLOCK; ptrdiff_t oblock = FFTW_MPI_DEFAULT_BLOCK; @@ -218,13 +230,28 @@ void Solver::CreateFFTWPlans(double *in, fftw_complex *transform // But, according to https://fftw.org/doc/MPI-Plan-Creation.html the BLOCK sizes must be the same: // "These must be the same block sizes as were passed to the corresponding ‘local_size’ function" const ptrdiff_t n[3] = {n_x, n_y, n_z}; - planfft = fftw_mpi_plan_many_dft_r2c(rank, n, howmany, iblock, oblock, in, transformed, MPI_COMM_WORLD, FFTW_MEASURE | FFTW_MPI_TRANSPOSED_OUT); - planifft = fftw_mpi_plan_many_dft_c2r(rank, n, howmany, iblock, oblock, transformed, out, MPI_COMM_WORLD, FFTW_MEASURE | FFTW_MPI_TRANSPOSED_IN); + planfft = fftw_mpi_plan_many_dft_r2c(rank, n, howmany, iblock, oblock, in, transformed, communicator, FFTW_MEASURE | FFTW_MPI_TRANSPOSED_OUT); + planifft = fftw_mpi_plan_many_dft_c2r(rank, n, howmany, iblock, oblock, transformed, out, communicator, FFTW_MEASURE | FFTW_MPI_TRANSPOSED_IN); // see https://eigen.tuxfamily.org/dox/group__TutorialMapClass.html#title3 new (&rhat) Map((std::complex *) transformed, local_n1 * n_x * (n_z / 2 + 1) * howmany); } +template +void Solver::register_serialization(registry_t &r) +{ + Log::io->trace() << "Solver::register_serialization v_r size=" << std::max(reader.alloc_local * 2, (local_n0 + 1) * n_y * (n_z + 2) * howmany) * sizeof(double) << "\n"; + r.emplace_back(v_r, std::max(reader.alloc_local * 2, (local_n0 + 1) * n_y * (n_z + 2) * howmany) * sizeof(double), true); + Log::io->trace() << "Solver::register_serialization v_u size=" << (local_n0 * n_y * n_z * howmany) * sizeof(double) << "\n"; + r.emplace_back(v_u, (local_n0 * n_y * n_z * howmany) * sizeof(double), true); + Log::io->trace() << "Solver::register_serialization v_u_prev size=" << (local_n0 * n_y * n_z * howmany) * sizeof(double) << "\n"; + r.emplace_back(v_u_prev, (local_n0 * n_y * n_z * howmany) * sizeof(double), true); + Log::io->trace() << "Solver::register_serialization rhat size=" << (local_n1 * n_x * (n_z / 2 + 1) * howmany) * sizeof(std::complex) << "\n"; + r.emplace_back(std::data(rhat), (local_n1 * n_x * (n_z / 2 + 1) * howmany) * sizeof(std::complex), true); + Log::io->trace() << "Solver::register_serialization fundamentalSolution size=" << ((local_n1 * n_x * (n_z / 2 + 1) * (howmany + 1)) / 2) * sizeof(double) << "\n"; + r.emplace_back(std::data(fundamentalSolution), ((local_n1 * n_x * (n_z / 2 + 1) * (howmany + 1)) / 2) * sizeof(double), true); +} + // TODO: possibly circumvent the padding problem by accessing r as a matrix? template template @@ -242,7 +269,7 @@ void Solver::compute_residual_basic(RealArray &r_matrix, RealArr // int MPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, // int recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status) MPI_Sendrecv(u, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + world_size - 1) % world_size, 0, - u + local_n0 * n_y * n_z * howmany, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + u + local_n0 * n_y * n_z * howmany, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, communicator, MPI_STATUS_IGNORE); Matrix ue; @@ -262,7 +289,7 @@ void Solver::compute_residual_basic(RealArray &r_matrix, RealArr }); MPI_Sendrecv(r + local_n0 * n_y * (n_z + padding) * howmany, n_y * (n_z + padding) * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, - buffer_padding, n_y * (n_z + padding) * howmany, MPI_DOUBLE, (world_rank + world_size - 1) % world_size, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + buffer_padding, n_y * (n_z + padding) * howmany, MPI_DOUBLE, (world_rank + world_size - 1) % world_size, 0, communicator, MPI_STATUS_IGNORE); RealArray b(buffer_padding, n_z * howmany, n_y, OuterStride<>((n_z + padding) * howmany)); // NOTE: for any padding of more than 2, the buffer_padding has to be extended @@ -282,20 +309,18 @@ void Solver::compute_residual(RealArray &r_matrix, RealArray &u_ template void Solver::solve() { - err_all = ArrayXd::Zero(n_it + 1); fft_time = 0.0; clock_t tot_time = clock(); internalSolve(); tot_time = clock() - tot_time; - // if( VERBOSITY > 5 ){ - if (world_rank == 0) { - printf("# FFT Time per iteration ....... %2.6f sec\n", double(fft_time) / CLOCKS_PER_SEC / iter); - printf("# Total FFT Time ............... %2.6f sec\n", double(fft_time) / CLOCKS_PER_SEC); - printf("# Total Time per iteration ..... %2.6f sec\n", double(tot_time) / CLOCKS_PER_SEC / iter); - printf("# Total Time ................... %2.6f sec\n", double(tot_time) / CLOCKS_PER_SEC); - printf("# FFT contribution to total time %2.6f %% \n", 100. * double(fft_time) / double(tot_time)); - } + + Log::solver->info() << "# FFT Time per iteration ....... " << std::setw(2) << std::setprecision(6) << double(fft_time) / CLOCKS_PER_SEC / iter << std::defaultfloat << " sec\n"; + Log::solver->info() << "# Total FFT Time ............... " << std::setw(2) << std::setprecision(6) << double(fft_time) / CLOCKS_PER_SEC << std::defaultfloat << " sec\n"; + Log::solver->info() << "# Total Time per iteration ..... " << std::setw(2) << std::setprecision(6) << double(tot_time) / CLOCKS_PER_SEC / iter << std::defaultfloat << " sec\n"; + Log::solver->info() << "# Total Time ................... " << std::setw(2) << std::setprecision(6) << double(tot_time) / CLOCKS_PER_SEC << std::defaultfloat << " sec\n"; + Log::solver->info() << "# FFT contribution to total time " << std::setw(2) << std::setprecision(6) << 100. * double(fft_time) / double(tot_time) << std::defaultfloat << " % \n"; + matmanager->update_internal_variables(); } @@ -427,19 +452,18 @@ double Solver::compute_error(RealArray &r) } double err; - MPI_Allreduce(&err_local, &err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(&err_local, &err, 1, MPI_DOUBLE, MPI_MAX, communicator); err_all[iter] = err; double err0 = err_all[0]; double err_rel = (iter == 0 ? 100 : err / err0); - if (world_rank == 0) { - if (iter == 0) { - printf("Before 1st iteration: %16.8e\n", err0); - } else { - printf("it %3lu .... err %16.8e / %8.4e, ratio: %4.8e, FFT time: %2.6f sec\n", iter, err, err / err0, (iter == 1 ? 0.0 : err / err_all[iter - 1]), double(buftime) / CLOCKS_PER_SEC); - } - } + if (iter == 0) Log::solver->info() << "Before 1st iteration: " << std::setw(16) << std::setprecision(8) << err0 << std::defaultfloat << "\n"; + else Log::solver->info() << "it " << iter << " .... " + << "err " << std::setw(16) << std::setprecision(8) << err << std::defaultfloat << " / " + << std::setw( 8) << std::setprecision(4) << err / err0 << std::defaultfloat + << ", ratio: " << std::setw(4) << std::setprecision(8) << (iter == 1 ? 0.0 : err / err_all[iter - 1]) << std::defaultfloat + << ", FFT time: " << std::setw(2) << std::setprecision(6) << static_cast(buftime) / CLOCKS_PER_SEC << std::defaultfloat << " sec\n"; const std::string &error_type = reader.errorParameters["type"].get(); if (error_type == "absolute") { @@ -495,7 +519,7 @@ void Solver::postprocess(Reader &reader, int load_idx, int time_ vector phase_counts(n_mat, 0); MPI_Sendrecv(v_u, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + world_size - 1) % world_size, 0, - v_u + local_n0 * n_y * n_z * howmany, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + v_u + local_n0 * n_y * n_z * howmany, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, communicator, MPI_STATUS_IGNORE); Matrix ue; int phase_id; @@ -555,16 +579,16 @@ void Solver::postprocess(Reader &reader, int load_idx, int time_ }); } - MPI_Allreduce(MPI_IN_PLACE, stress_average.data(), n_str, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE, strain_average.data(), n_str, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, stress_average.data(), n_str, MPI_DOUBLE, MPI_SUM, communicator); + MPI_Allreduce(MPI_IN_PLACE, strain_average.data(), n_str, MPI_DOUBLE, MPI_SUM, communicator); stress_average /= (n_x * n_y * n_z); strain_average /= (n_x * n_y * n_z); // Reduce per-phase accumulations across all processes for (int mat_index = 0; mat_index < n_mat; ++mat_index) { - MPI_Allreduce(MPI_IN_PLACE, phase_stress_average[mat_index].data(), n_str, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE, phase_strain_average[mat_index].data(), n_str, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE, &phase_counts[mat_index], 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, phase_stress_average[mat_index].data(), n_str, MPI_DOUBLE, MPI_SUM, communicator); + MPI_Allreduce(MPI_IN_PLACE, phase_strain_average[mat_index].data(), n_str, MPI_DOUBLE, MPI_SUM, communicator); + MPI_Allreduce(MPI_IN_PLACE, &phase_counts[mat_index], 1, MPI_INT, MPI_SUM, communicator); // Compute average for each phase if (phase_counts[mat_index] > 0) { @@ -573,16 +597,12 @@ void Solver::postprocess(Reader &reader, int load_idx, int time_ } } - if (world_rank == 0) { - printf("# Effective Stress .. ("); - for (int i = 0; i < n_str; ++i) - printf("%+.12f ", stress_average[i]); - printf(") \n"); - printf("# Effective Strain .. ("); - for (int i = 0; i < n_str; ++i) - printf("%+.12f ", strain_average[i]); - printf(") \n\n"); - } + Log::solver->info() << "# Effective Stress .. ("; + for (int i = 0; i < n_str; ++i) Log::solver->info(true) << std::showpos << std::fixed << std::setprecision(12) << stress_average[i] << " " << std::noshowpos << std::defaultfloat; + Log::solver->info(true) << ") \n"; + Log::solver->info() << "# Effective Strain .. ("; + for (int i = 0; i < n_str; ++i) Log::solver->info(true) << std::showpos << std::fixed << std::setprecision(12) << strain_average[i] << " " << std::noshowpos << std::defaultfloat; + Log::solver->info(true) << ") \n\n"; /* ====================================================================== * * u_total = g0·X + ũ (vector or scalar, decided at compile time) @@ -695,11 +715,7 @@ void Solver::postprocess(Reader &reader, int load_idx, int time_ if (find(reader.resultsToWrite.begin(), reader.resultsToWrite.end(), "homogenized_tangent") != reader.resultsToWrite.end()) { homogenized_tangent = get_homogenized_tangent(1e-6); hsize_t dims[2] = {static_cast(n_str), static_cast(n_str)}; - if (world_rank == 0) { - cout << "# Homogenized tangent: " << endl - << setprecision(12) << homogenized_tangent << endl - << endl; - } + Log::solver->info() << "# Homogenized tangent: \n" << std::setprecision(12) << homogenized_tangent << std::defaultfloat << "\n\n"; reader.writeData("homogenized_tangent", load_idx, time_idx, homogenized_tangent.data(), dims, 2); } extrapolateDisplacement(); // prepare v_u for next time step @@ -714,7 +730,7 @@ VectorXd Solver::get_homogenized_stress() homogenized_stress = VectorXd::Zero(n_str); MPI_Sendrecv(v_u, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + world_size - 1) % world_size, 0, - v_u + local_n0 * n_y * n_z * howmany, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + v_u + local_n0 * n_y * n_z * howmany, n_y * n_z * howmany, MPI_DOUBLE, (world_rank + 1) % world_size, 0, communicator, MPI_STATUS_IGNORE); Matrix ue; int phase_id; @@ -731,7 +747,7 @@ VectorXd Solver::get_homogenized_stress() homogenized_stress += stress.segment(n_str * idx[0], n_str); }); - MPI_Allreduce(MPI_IN_PLACE, homogenized_stress.data(), n_str, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, homogenized_stress.data(), n_str, MPI_DOUBLE, MPI_SUM, communicator); homogenized_stress /= (n_x * n_y * n_z); return homogenized_stress; diff --git a/include/solverCG.h b/include/solverCG.h index b02a428..0fcdf59 100644 --- a/include/solverCG.h +++ b/include/solverCG.h @@ -53,15 +53,14 @@ double SolverCG::dotProduct(RealArray &a, RealArray &b) { double local_value = (a * b).sum(); double result; - MPI_Allreduce(&local_value, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&local_value, &result, 1, MPI_DOUBLE, MPI_SUM, this->communicator); return result; } template void SolverCG::internalSolve() { - if (this->world_rank == 0) - printf("\n# Start FANS - Conjugate Gradient Solver \n"); + Log::solver->info() << "\n# Start FANS - Conjugate Gradient Solver \n"; bool islinear = this->matmanager->all_linear; @@ -110,8 +109,8 @@ void SolverCG::internalSolve() iter++; err_rel = this->compute_error(v_r_real); } - if (this->world_rank == 0) - printf("# Complete FANS - Conjugate Gradient Solver \n"); + + Log::solver->info() << "# Complete FANS - Conjugate Gradient Solver \n"; } template @@ -142,8 +141,10 @@ void SolverCG::LineSearchSecant() } v_u_real += d_real * (alpha_new - alpha_old); v_r_real = rnew_real; - if (this->world_rank == 0) - printf("line search iter %i, alpha %f - error %e - ", _iter, alpha_new, err); + + Log::solver->info() << "line search iter " << _iter + << ", alpha " << std::fixed << std::setprecision(6) << alpha_new << std::defaultfloat + << " - error " << std::fixed << std::setprecision(6) << err << std::defaultfloat << " - "; } template diff --git a/include/solverFP.h b/include/solverFP.h index 67b99a4..6a00691 100644 --- a/include/solverFP.h +++ b/include/solverFP.h @@ -32,8 +32,7 @@ SolverFP::SolverFP(Reader &reader, MaterialManager void SolverFP::internalSolve() { - if (this->world_rank == 0) - printf("\n# Start FANS - Fixed Point Solver \n"); + Log::solver->info() << "\n# Start FANS - Fixed Point Solver \n"; this->template compute_residual<2>(v_r_real, v_u_real); @@ -50,7 +49,7 @@ void SolverFP::internalSolve() iter++; err_rel = this->compute_error(v_r_real); } - if (this->world_rank == 0) - printf("# Complete FANS - Fixed Point Solver \n"); + + Log::solver->info() << "# Complete FANS - Fixed Point Solver \n"; } #endif diff --git a/pixi.lock b/pixi.lock index a1b4345..1c1db95 100644 --- a/pixi.lock +++ b/pixi.lock @@ -2253,7 +2253,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zstandard-0.25.0-py314h2e8dab5_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.7-hbcf94c1_2.conda - conda: . - subdir: linux-aarch64 + build: he8cfe8b_0 osx-64: - conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/aiobotocore-2.25.2-pyhcf101f3_0.conda @@ -2600,7 +2600,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-64/zstandard-0.25.0-py314hd1e8ddb_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.7-h8210216_2.conda - conda: . - subdir: osx-64 + build: h0dc7051_0 osx-arm64: - conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/aiobotocore-2.25.2-pyhcf101f3_0.conda @@ -6125,63 +6125,55 @@ packages: - conda: . name: fans version: 0.6.1 - build: h1235946_0 - subdir: linux-64 + build: h0dc7051_0 + subdir: osx-64 variants: - cxx_compiler: clangxx - target_platform: linux-64 + target_platform: osx-64 depends: - - libstdcxx >=15 - - libgcc >=15 + - libcxx >=21 - hdf5 >=1.14.6,<1.14.7.0a0 mpi_openmpi_* - fftw >=3.3.10,<4.0a0 - fftw * mpi_openmpi_* - input: - hash: 16dd9c1131985cbcdb3bb37c6672fa20ea368188987b8d42da1b2ebd02bd9fbf - globs: [] - conda: . name: fans version: 0.6.1 - build: hbf21a9e_0 - subdir: linux-aarch64 + build: h1235946_0 + subdir: linux-64 + variants: + cxx_compiler: clangxx + target_platform: linux-64 depends: - libstdcxx >=15 - libgcc >=15 - hdf5 >=1.14.6,<1.14.7.0a0 mpi_openmpi_* - fftw >=3.3.10,<4.0a0 - fftw * mpi_openmpi_* - input: - hash: 8ad54598e221efef736cec65db9e327856f556e55f81445cda5620e181ef8813 - globs: [] - conda: . name: fans version: 0.6.1 - build: hbf21a9e_0 - subdir: osx-64 + build: hcb8d3e5_0 + subdir: osx-arm64 + variants: + cxx_compiler: clangxx + target_platform: osx-arm64 depends: - libcxx >=21 - hdf5 >=1.14.6,<1.14.7.0a0 mpi_openmpi_* - fftw >=3.3.10,<4.0a0 - fftw * mpi_openmpi_* - input: - hash: 8ad54598e221efef736cec65db9e327856f556e55f81445cda5620e181ef8813 - globs: [] - conda: . name: fans version: 0.6.1 - build: hcb8d3e5_0 - subdir: osx-arm64 + build: he8cfe8b_0 + subdir: linux-aarch64 variants: - cxx_compiler: clangxx - target_platform: osx-arm64 + target_platform: linux-aarch64 depends: - - libcxx >=21 + - libstdcxx >=15 + - libgcc >=15 - hdf5 >=1.14.6,<1.14.7.0a0 mpi_openmpi_* - fftw >=3.3.10,<4.0a0 - fftw * mpi_openmpi_* - input: - hash: 16dd9c1131985cbcdb3bb37c6672fa20ea368188987b8d42da1b2ebd02bd9fbf - globs: [] - conda: https://conda.anaconda.org/conda-forge/linux-64/fftw-3.3.10-mpi_openmpi_h99e62ba_10.conda sha256: 59a1fd0daa71bd5529e19b4da8aae2ded4d4ef05e5e31d80c39cbe2fc7664b6a md5: 3fcbf2ef5f3a8f0ed5683f60fe08293b diff --git a/pyfans/micro.cpp b/pyfans/micro.cpp index b9f3636..4e82f22 100644 --- a/pyfans/micro.cpp +++ b/pyfans/micro.cpp @@ -6,6 +6,7 @@ #include "micro.hpp" #include "setup.h" #include "matmodel.h" +#include "mpi.h" py::array_t merge_arrays(py::array_t array1, py::array_t array2) { @@ -24,21 +25,34 @@ py::array_t merge_arrays(py::array_t array1, py::array_t return result; } -MicroSimulation::MicroSimulation(int sim_id, char *input_file) +MicroSimulation::MicroSimulation(int sim_id, bool late_init, char *input_file) : _sim_id(sim_id) { // initialize fftw mpi fftw_mpi_init(); - // Input file name is hardcoded. TODO: Make it configurable - reader.ReadInputFile(input_file); + if (not late_init || sim_id >= 0) { + // Input file name is hardcoded. TODO: Make it configurable + reader.ReadInputFile(input_file); + reader.ReadMS(3); + + if (reader.strain_type == "small") { + matmanager = createMaterialManager<3, 6>(reader); + solver = createSolver<3, 6>(reader, std::get *>(matmanager)); + } else { + matmanager = createMaterialManager<3, 9>(reader); + solver = createSolver<3, 9>(reader, std::get *>(matmanager)); + } + } +} - reader.ReadMS(3); - matmanager = createMaterialManager<3, 6>(reader); - solver = createSolver<3, 6>(reader, matmanager); +MicroSimulation::~MicroSimulation() +{ + Log::finalize(); } py::dict MicroSimulation::solve(py::dict macro_data, double dt) { + const bool is_small_strain = std::holds_alternative *>(matmanager); // Time step value dt is not used currently, but is available for future use // Create a pybind style Numpy array from macro_write_data["micro_vector_data"], which is a Numpy array @@ -46,44 +60,115 @@ py::dict MicroSimulation::solve(py::dict macro_data, double dt) py::array_t strain2 = macro_data["strains4to6"].cast>(); py::array_t strain = merge_arrays(strain1, strain2); - std::vector g0 = std::vector(strain.data(), strain.data() + strain.size()); // convert numpy array to std::vector. + if (not is_small_strain) { + py::array_t strain3 = macro_data["strains7to9"].cast>(); + strain = merge_arrays(strain, strain3); + } + + std::vector g0 = std::vector(strain.data(), strain.data() + strain.size()); // convert numpy array to std::vector. VectorXd homogenized_stress; - matmanager->set_gradient(g0); + std::visit([&](auto &mm) { mm->set_gradient(g0); }, matmanager); - solver->solve(); + std::visit([](auto &s) { s->solve(); }, solver); - homogenized_stress = solver->get_homogenized_stress(); + homogenized_stress = std::visit([](auto &s) -> VectorXd { return s->get_homogenized_stress(); }, solver); - auto C = solver->get_homogenized_tangent(pert_param); + MatrixXd C = std::visit([&](auto &s) -> MatrixXd { return s->get_homogenized_tangent(pert_param); }, solver); // Convert data to a py::dict again to send it back to the Micro Manager py::dict micro_write_data; // Add stress and stiffness matrix data to Python dict to be returned - std::vector stress13 = {homogenized_stress[0], homogenized_stress[1], homogenized_stress[2]}; - micro_write_data["stresses1to3"] = stress13; - std::vector stress46 = {homogenized_stress[3], homogenized_stress[4], homogenized_stress[5]}; - micro_write_data["stresses4to6"] = stress46; - std::vector C_1 = {C(0, 0), C(0, 1), C(0, 2)}; - micro_write_data["cmat1"] = C_1; - std::vector C_2 = {C(0, 3), C(0, 4), C(0, 5)}; - micro_write_data["cmat2"] = C_2; - std::vector C_3 = {C(1, 1), C(1, 2), C(1, 3)}; - micro_write_data["cmat3"] = C_3; - std::vector C_4 = {C(1, 4), C(1, 5), C(2, 2)}; - micro_write_data["cmat4"] = C_4; - std::vector C_5 = {C(2, 3), C(2, 4), C(2, 5)}; - micro_write_data["cmat5"] = C_5; - std::vector C_6 = {C(3, 3), C(3, 4), C(3, 5)}; - micro_write_data["cmat6"] = C_6; - std::vector C_7 = {C(4, 4), C(4, 5), C(5, 5)}; - micro_write_data["cmat7"] = C_7; + if (is_small_strain) { + std::vector stress13 = {homogenized_stress[0], homogenized_stress[1], homogenized_stress[2]}; + micro_write_data["stresses1to3"] = stress13; + std::vector stress46 = {homogenized_stress[3], homogenized_stress[4], homogenized_stress[5]}; + micro_write_data["stresses4to6"] = stress46; + std::vector C_1 = {C(0, 0), C(0, 1), C(0, 2)}; + micro_write_data["cmat1"] = C_1; + std::vector C_2 = {C(0, 3), C(0, 4), C(0, 5)}; + micro_write_data["cmat2"] = C_2; + std::vector C_3 = {C(1, 1), C(1, 2), C(1, 3)}; + micro_write_data["cmat3"] = C_3; + std::vector C_4 = {C(1, 4), C(1, 5), C(2, 2)}; + micro_write_data["cmat4"] = C_4; + std::vector C_5 = {C(2, 3), C(2, 4), C(2, 5)}; + micro_write_data["cmat5"] = C_5; + std::vector C_6 = {C(3, 3), C(3, 4), C(3, 5)}; + micro_write_data["cmat6"] = C_6; + std::vector C_7 = {C(4, 4), C(4, 5), C(5, 5)}; + micro_write_data["cmat7"] = C_7; + } else { + std::vector stress13 = {homogenized_stress[0], homogenized_stress[1], homogenized_stress[2]}; + micro_write_data["stresses1to3"] = stress13; + std::vector stress46 = {homogenized_stress[3], homogenized_stress[4], homogenized_stress[5]}; + micro_write_data["stresses4to6"] = stress46; + std::vector stress79 = {homogenized_stress[6], homogenized_stress[7], homogenized_stress[8]}; + micro_write_data["stresses7to9"] = stress79; + } return micro_write_data; } +py::dict MicroSimulation::get_state() +{ + py::dict state; + + Log::io->debug() << "Serializing reader\n"; + Serializable::buffer_t reader_state = reader.serialize_full(); + state["reader_state"] = reader_state.as_int_vector(); + Log::io->debug() << "Serializing solver\n"; + Serializable::buffer_t solver_state = std::visit([](auto &s) { return s->serialize_full(); }, solver); + state["solver_state"] = solver_state.as_int_vector(); + Log::io->debug() << "Serializing material manager\n"; + Serializable::buffer_t matmngr_state = std::visit([](auto &m) { return m->serialize_full(); }, matmanager); + state["matmanager_state"] = matmngr_state.as_int_vector(); + Log::io->debug() << "Serializing micro sim done\n"; + + return state; +} + +void MicroSimulation::set_state(py::dict &state) +{ + auto py_reader_state = state["reader_state"].cast>(); + std::vector reader_state_i(py_reader_state.data(), py_reader_state.data() + py_reader_state.size()); + Serializable::buffer_t &reader_state = Serializable::buffer_t::from_int_vector(reader_state_i); + + auto py_solver_state = state["solver_state"].cast>(); + std::vector solver_state_i(py_solver_state.data(), py_solver_state.data() + py_solver_state.size()); + Serializable::buffer_t &solver_state = Serializable::buffer_t::from_int_vector(solver_state_i); + + auto py_matmngr_state = state["matmanager_state"].cast>(); + std::vector matmngr_state_i(py_matmngr_state.data(), py_matmngr_state.data() + py_matmngr_state.size()); + Serializable::buffer_t &matmngr_state = Serializable::buffer_t::from_int_vector(matmngr_state_i); + + reader.deserialize_full(reader_state); + if (reader.strain_type == "small") { + auto *mat_ptr = createMaterialManager<3, 6>(reader); + mat_ptr->deserialize_full(matmngr_state); + matmanager = mat_ptr; + auto *sol_ptr = createSolver<3, 6>(reader, nullptr); + sol_ptr->init_fundamentalSolutionBuffer(); + sol_ptr->matmanager = std::get *>(matmanager); + sol_ptr->deserialize_full(solver_state); + solver = sol_ptr; + + } else { + auto *mat_ptr = createMaterialManager<3, 9>(reader); + mat_ptr->deserialize_full(matmngr_state); + matmanager = mat_ptr; + auto *sol_ptr = createSolver<3, 9>(reader, nullptr); + sol_ptr->init_fundamentalSolutionBuffer(); + sol_ptr->matmanager = std::get *>(matmanager); + sol_ptr->deserialize_full(solver_state); + solver = sol_ptr; + } +} + +int MicroSimulation::get_id() { return _sim_id; } + PYBIND11_MODULE(PyFANS, m) { // optional docstring @@ -91,5 +176,22 @@ PYBIND11_MODULE(PyFANS, m) py::class_(m, "MicroSimulation") .def(py::init()) - .def("solve", &MicroSimulation::solve); + .def("solve", &MicroSimulation::solve) + .def(py::pickle( + [](MicroSimulation& m) { + return py::make_tuple(m.get_id(), m.get_state()); + }, + [](py::tuple t) { + int id = t[0].cast(); + py::dict state = t[1].cast(); + + MicroSimulation* m = new MicroSimulation(id, true); + m->set_state(state); + return m; + } + )) + .def("set_state", &MicroSimulation::set_state) + .def("get_state", &MicroSimulation::get_state) + .def("get_global_id", &MicroSimulation::get_id) + ; } diff --git a/pyfans/micro.hpp b/pyfans/micro.hpp index b6ebc6c..db83a04 100644 --- a/pyfans/micro.hpp +++ b/pyfans/micro.hpp @@ -5,6 +5,7 @@ #pragma once #include #include +#include #include "pybind11/pybind11.h" #include "pybind11/numpy.h" // numpy arrays @@ -14,19 +15,28 @@ #include "matmodel.h" #include "MaterialManager.h" #include "solver.h" +#include "serialization.h" namespace py = pybind11; class MicroSimulation { public: - MicroSimulation(int sim_id, char *input_file = "input.json"); + MicroSimulation(int sim_id, bool late_init = false, char *input_file = "input.json"); + ~MicroSimulation(); py::dict solve(py::dict macro_write_data, double dt); + py::dict get_state(); + void set_state(py::dict &state); + + int get_id(); + private: int _sim_id; Reader reader; // Hardcoding mechanical models because these definitions need information from the input file. - MaterialManager<3, 6> *matmanager; - Solver<3, 6> *solver; - double pert_param = 1e-6; // scalar strain perturbation parameter + using matmanager_t = std::variant *, MaterialManager<3, 9> *>; + using solver_t = std::variant *, Solver<3, 9> *>; + matmanager_t matmanager; + solver_t solver; + double pert_param = 1e-6; // scalar strain perturbation parameter }; diff --git a/src/logging.cpp b/src/logging.cpp new file mode 100755 index 0000000..cc36aa6 --- /dev/null +++ b/src/logging.cpp @@ -0,0 +1,174 @@ +#include "logging.h" + +#include + +#include "mpi.h" + +int active_rank = 0; + +Log::MPI_TraceSync::MPI_TraceSync(Logger &log, bool append) : _log(log), _append(append) { } +Log::MPI_TraceSync::~MPI_TraceSync() +{ + if (active_level >= All) { + MPI_Barrier(_log._comm); + const int size = _log._comm_size; + for (int i = 0; i < size; i++) { + Log::setActiveRank(i); + MPI_Barrier(_log._comm); + _log.trace_impl(_append) << _buffer.str() << std::flush; + } + Log::setActiveRank(0); + MPI_Barrier(_log._comm); + } +} + +Log::MPI_TraceSync &Log::MPI_TraceSync::operator<<(std::ostream &(*m)(std::ostream &) ) +{ + _buffer << m; + return *this; +} + +Log::Logger::Logger(std::string prefix, int comm_rank, int comm_size, const MPI_Comm& comm) : _prefix(std::move(prefix)), _nullstr(), _comm_rank(comm_rank), _comm_size(comm_size), _comm(comm) { + _start_time = std::chrono::steady_clock::now(); + _nullstr.setstate(std::ios_base::badbit); +} + +std::ostream &Log::Logger::error(bool append) { + if (active_level >= Error && _comm_rank == active_rank) { + if (append) return std::cout; + std::cout << _prefix; + auto now = std::chrono::steady_clock::now(); + auto elapse_time = static_cast(std::chrono::duration_cast(now - _start_time).count()) / 1000.0; + std::cout << elapse_time << ": "; + return std::cout; + } else return _nullstr; +} + +std::ostream &Log::Logger::info(bool append) { + if (active_level >= Info && _comm_rank == active_rank) { + if (append) return std::cout; + std::cout << _prefix; + auto now = std::chrono::steady_clock::now(); + auto elapse_time = static_cast(std::chrono::duration_cast(now - _start_time).count()) / 1000.0; + std::cout << elapse_time << ": "; + return std::cout; + } else return _nullstr; +} + +std::ostream &Log::Logger::warn(bool append) { + if (active_level >= Warn && _comm_rank == active_rank) { + if (append) return std::cout; + std::cout << _prefix; + auto now = std::chrono::steady_clock::now(); + auto elapse_time = static_cast(std::chrono::duration_cast(now - _start_time).count()) / 1000.0; + std::cout << elapse_time << ": "; + return std::cout; + } else return _nullstr; +} + +std::ostream &Log::Logger::debug(bool append) { + if (active_level >= Debug && _comm_rank == active_rank) { + if (append) return std::cout; + std::cout << _prefix; + auto now = std::chrono::steady_clock::now(); + auto elapse_time = static_cast(std::chrono::duration_cast(now - _start_time).count()) / 1000.0; + std::cout << elapse_time << ": "; + return std::cout; + } else return _nullstr; +} + +Log::MPI_TraceSync Log::Logger::trace(bool append) { + return Log::MPI_TraceSync(*this, append); +} + +std::ostream &Log::Logger::trace_impl(bool append) { + if (active_level >= All && _comm_rank == active_rank) { + if (append) return std::cout; + std::cout << _prefix << "[" << active_rank << "] "; + auto now = std::chrono::steady_clock::now(); + auto elapse_time = static_cast(std::chrono::duration_cast(now - _start_time).count()) / 1000.0; + std::cout << elapse_time << ": "; + return std::cout; + } else return _nullstr; +} + +void Log::Logger::progress(const std::string &prefix, int step, int max) { + if (_comm_rank != active_rank) return; + + if (step == 0) std::cout << _prefix << prefix; + + int digits = 1; + int div = 10; + while ((max / div) > 0) { + digits++; + div *= 10; + } + if (step > 0) for (int i = 0; i < digits; i++) std::cout << '\b'; + + int step_digits = 1; + div = 10; + while ((step / div) > 0) { + step_digits++; + div *= 10; + } + + for (int i = 0; i < digits - step_digits; i++) std::cout << ' '; + std::cout << step; + + if (step >= max-1) { + for (int i = 0; i < digits; i++) + std::cout << '\b'; + for (int i = 0; i < prefix.size(); i++) + std::cout << '\b'; + for (int i = 0; i < _prefix.size(); i++) + std::cout << '\b'; + } + //if (step >= max) std::cout << '\n'; + std::cout.flush(); +} + +void Log::init(int comm_rank, int comm_size, const MPI_Comm& comm) { + Level level = Log::Error; + if constexpr (VERBOSITY <= 0) level = Error; + if constexpr (VERBOSITY == 1) level = Info; + if constexpr (VERBOSITY == 2) level = Warn; + if constexpr (VERBOSITY == 3) level = Debug; + if constexpr (VERBOSITY >= 4) level = All; + + active_level = level; + general = std::make_unique("[FANS-GENERAL] ", comm_rank, comm_size, comm); + solver = std::make_unique("[FANS-SOLVER] ", comm_rank, comm_size, comm); + io = std::make_unique("[FANS-IO] ", comm_rank, comm_size, comm); +} + +void Log::finalize() { + general = nullptr; + solver = nullptr; + io = nullptr; +} + +Log::Level Log::active_level = Info; +std::unique_ptr Log::general = nullptr; +std::unique_ptr Log::solver = nullptr; +std::unique_ptr Log::io = nullptr; + +const std::string Log::level_to_string(Log::Level level) { + switch (level) { + case Error: + return "Error"; + case Info: + return "Info"; + case Warn: + return "Warn"; + case Debug: + return "Debug"; + case All: + return "All"; + } + return ""; +} + +void Log::setActiveRank(int rank) { + active_rank = rank; +} + diff --git a/src/main.cpp b/src/main.cpp index 6c69bf6..bcdce0b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,6 +4,7 @@ #include "solver.h" // Version +#include "logging.h" #include "version.h" template @@ -15,17 +16,13 @@ void runSolver(Reader &reader) MaterialManager *matmanager = createMaterialManager(reader); Solver *solver = createSolver(reader, matmanager); - if (reader.world_rank == 0) { - printf("\n╔════════════════════════════════════════════════════════════ Load case %zu/%zu: %zu time steps ════════════════════════════════════════════════════════════╗\n", - load_path_idx + 1, reader.load_cases.size(), reader.load_cases[load_path_idx].n_steps); - } + Log::general->info() << "\n╔════════════════════════════════════════════════════════════ Load case " + << load_path_idx + 1 << "/" << reader.load_cases.size() << ": " << reader.load_cases[load_path_idx].n_steps + << " time steps ════════════════════════════════════════════════════════════╗\n"; for (size_t time_step_idx = 0; time_step_idx < reader.load_cases[load_path_idx].n_steps; ++time_step_idx) { - if (reader.world_rank == 0) { - printf("║ ▶ Time step %zu/%zu (load case %zu/%zu) ◀ \n", - time_step_idx + 1, reader.load_cases[load_path_idx].n_steps, - load_path_idx + 1, reader.load_cases.size()); - } + Log::general->info() << "║ ▶ Time step " << time_step_idx + 1 << "/" << reader.load_cases[load_path_idx].n_steps + << " (load case " << load_path_idx + 1 << "/" << reader.load_cases.size() << ") ◀ \n"; if (reader.load_cases[load_path_idx].mixed) { solver->enableMixedBC(reader.load_cases[load_path_idx].mbc, time_step_idx); } else { @@ -37,9 +34,7 @@ void runSolver(Reader &reader) } delete solver; delete matmanager; - if (reader.world_rank == 0) { - printf("╚══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n"); - } + Log::general->info() << "╚══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n"; } } @@ -73,6 +68,7 @@ int main(int argc, char *argv[]) } reader.CloseResultsFile(); + Log::finalize(); MPI_Finalize(); return 0; } diff --git a/src/reader.cpp b/src/reader.cpp index 288d4af..b1fe343 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -1,5 +1,6 @@ #include "general.h" #include "reader.h" +#include "logging.h" #include "H5Cpp.h" #include "fftw3-mpi.h" @@ -29,16 +30,14 @@ void Reader::ComputeVolumeFractions() // Find the global maximum and minimum material indices unsigned short global_max, global_min; - MPI_Allreduce(&local_max, &global_max, 1, MPI_UNSIGNED_SHORT, MPI_MAX, MPI_COMM_WORLD); - MPI_Allreduce(&local_min, &global_min, 1, MPI_UNSIGNED_SHORT, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(&local_max, &global_max, 1, MPI_UNSIGNED_SHORT, MPI_MAX, communicator); + MPI_Allreduce(&local_min, &global_min, 1, MPI_UNSIGNED_SHORT, MPI_MIN, communicator); // Calculate total number of materials n_mat = global_max - global_min + 1; - if (world_rank == 0) { - printf("# Number of materials: %i (from %u to %u)\n", n_mat, global_min, global_max); - printf("# Volume fractions\n"); - } + Log::io->info() << "# Number of materials: " << n_mat << " (from " << global_min << " to " << global_max << ")\n"; + Log::io->info() << "# Volume fractions\n"; // Using dynamic allocation for arrays since we don't know size at compile time std::vector vol_frac(n_mat, 0); @@ -52,26 +51,45 @@ void Reader::ComputeVolumeFractions() for (int i = 0; i < n_mat; i++) { long vf; - MPI_Allreduce(&(vol_frac[i]), &vf, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&(vol_frac[i]), &vf, 1, MPI_LONG, MPI_SUM, communicator); v_frac[i] = double(vf) / double(dims[0] * dims[1] * dims[2]); - if (world_rank == 0) - printf("# material %4u vol. frac. %10.4f%% \n", - static_cast(i) + global_min, 100. * v_frac[i]); + Log::io->info() << "# material " << static_cast(i) + global_min << " vol. frac. " << std::setw(10) << std::setprecision(4) << 100. * v_frac[i] << std::defaultfloat << "%\n"; } } -void Reader ::ReadInputFile(char input_fn[]) +void Reader::ReadInputFile(char input_fn[]) { try { - - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - ifstream i(input_fn); json j; i >> j; + ReadJson(j); + + } catch (const std::exception &e) { + Log::io->error() << "ERROR trying to read input file '" << input_fn << "' for FANS\n"; + Log::finalize(); + exit(10); + } +} + +void Reader::ReadJson(json j) +{ + try { inputJson = j; // Store complete input JSON for MaterialManager + if (j.contains("no_mpi")) { + force_single_rank = true; + communicator = MPI_COMM_SELF; + } else { + force_single_rank = false; + communicator = MPI_COMM_WORLD; + } + + MPI_Comm_rank(communicator, &world_rank); + MPI_Comm_size(communicator, &world_size); + Log::init(world_rank, world_size, communicator); + Log::io->trace() << "Running with total ranks=" << world_size << ", rank=" << world_rank << "\n"; + microstructure = j["microstructure"]; strcpy(ms_filename, microstructure["filepath"].get().c_str()); strcpy(ms_datasetname, microstructure["datasetname"].get().c_str()); @@ -148,22 +166,20 @@ void Reader ::ReadInputFile(char input_fn[]) load_cases.push_back(std::move(lc)); } - if (world_rank == 0) { - printf("# microstructure file name: \t '%s'\n", ms_filename); - printf("# microstructure dataset name: \t '%s'\n", ms_datasetname); - printf("# strain type: \t %s\n", strain_type.c_str()); - printf("# problem type: \t %s\n", problemType.c_str()); - printf("# FE type: \t %s\n", FE_type.c_str()); - printf( - "# FANS error measure: \t %s %s error \n", - errorParameters["type"].get().c_str(), - errorParameters["measure"].get().c_str()); - printf("# FANS Tolerance: \t %10.5e\n", errorParameters["tolerance"].get()); - printf("# Max iterations: \t %6i\n", n_it); - } + Log::io->info() << "# microstructure file name: \t " << ms_filename << "\n"; + Log::io->info() << "# microstructure dataset name: \t " << ms_datasetname <<"\n"; + Log::io->info() << "# strain type: \t " << strain_type << "\n"; + Log::io->info() << "# problem type: \t " << problemType << "\n"; + Log::io->info() << "# FE type: \t " << FE_type << "\n"; + Log::io->info() << "# FANS error measure: \t " << + errorParameters["type"].get() << " " << + errorParameters["measure"].get() << " error \n"; + Log::io->info() << "# FANS Tolerance: \t " << std::setw(10) << std::setprecision(5) << errorParameters["tolerance"].get() << std::defaultfloat << "e\n"; + Log::io->info() << "# Max iterations: \t " << n_it << "\n"; } catch (const std::exception &e) { - fprintf(stderr, "ERROR trying to read input file '%s' for FANS\n", input_fn); + Log::io->error() << "ERROR trying to read input json for FANS\n"; + Log::finalize(); exit(10); } } @@ -222,8 +238,8 @@ void Reader ::ReadMS(int hm) hid_t plist_id; /* property list identifier */ herr_t status; - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank(communicator, &world_rank); + MPI_Comm_size(communicator, &world_size); MPI_Info info = MPI_INFO_NULL; // Set up file access property list with parallel I/O access @@ -257,13 +273,8 @@ void Reader ::ReadMS(int hm) H5Aclose(attr_id); H5Tclose(attr_type); } - if (world_rank == 0) { - if (is_zyx) { - printf("# Using Z-Y-X dimension ordering for the microstructure data\n"); - } else { - printf("# Using X-Y-Z dimension ordering for the microstructure data\n"); - } - } + if (is_zyx) Log::io->info() << "# Using Z-Y-X dimension ordering for the microstructure data\n"; + else Log::io->info() << "# Using X-Y-Z dimension ordering for the microstructure data\n"; dims.resize(3); if (is_zyx) { /* file layout Z Y X -> logical X Y Z */ @@ -281,18 +292,20 @@ void Reader ::ReadMS(int hm) l_e[1] = L[1] / double(dims[1]); l_e[2] = L[2] / double(dims[2]); - if (world_rank == 0) { - printf("# grid size set to [%i x %i x %i] --> %i voxels \nMicrostructure length: [%3.6f x %3.6f x %3.6f]\n", dims[0], dims[1], dims[2], dims[0] * dims[1] * dims[2], L[0], L[1], L[2]); - if (dims[0] % 2 != 0) - fprintf(stderr, "[ FANS3D_Grid ] WARNING: n_x is not a multiple of 2\n"); - if (dims[1] % 2 != 0) - fprintf(stderr, "[ FANS3D_Grid ] WARNING: n_y is not a multiple of 2\n"); - if (dims[2] % 2 != 0) - fprintf(stderr, "[ FANS3D_Grid ] WARNING: n_z is not a multiple of 2\n"); - if (dims[0] / 4 < world_size) - throw std::runtime_error("[ FANS3D_Grid ] ERROR: Please decrease the number of processes or increase the grid size to ensure that each process has at least 4 boxels in the x direction."); - printf("Voxel length: [%1.8f, %1.8f, %1.8f]\n", l_e[0], l_e[1], l_e[2]); - } + Log::io->info() << "# grid size set to [" << dims[0] << " x " << dims[1] << " x " << dims[2] << "] --> " << dims[0] * dims[1] * dims[2] << " voxels \n"; + Log::io->info() << "Microstructure length: [" + << std::setw(3) << std::setprecision(6) << L[0] << std::defaultfloat << " x " + << std::setw(3) << std::setprecision(6) << L[1] << std::defaultfloat << " x " + << std::setw(3) << std::setprecision(6) << L[2] << std::defaultfloat << "]\n"; + if (dims[0] % 2 != 0) Log::io->warn() << "[ FANS3D_Grid ] WARNING: n_x is not a multiple of 2\n"; + if (dims[1] % 2 != 0) Log::io->warn() << "[ FANS3D_Grid ] WARNING: n_y is not a multiple of 2\n"; + if (dims[2] % 2 != 0) Log::io->warn() << "[ FANS3D_Grid ] WARNING: n_z is not a multiple of 2\n"; + if (dims[0] / 4 < world_size) + throw std::runtime_error("[ FANS3D_Grid ] ERROR: Please decrease the number of processes or increase the grid size to ensure that each process has at least 4 boxels in the x direction."); + Log::io->info() << "Voxel length: [" + << std::setw(1) << std::setprecision(8) << l_e[0] << std::defaultfloat << ", " + << std::setw(1) << std::setprecision(8) << l_e[1] << std::defaultfloat << ", " + << std::setw(1) << std::setprecision(8) << l_e[2] << std::defaultfloat << "]\n"; const ptrdiff_t n[3] = {dims[0], dims[1], dims[2] / 2 + 1}; ptrdiff_t block0 = FFTW_MPI_DEFAULT_BLOCK; @@ -310,11 +323,11 @@ void Reader ::ReadMS(int hm) ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ */ - alloc_local = fftw_mpi_local_size_many_transposed(rank, n, hm, block0, block1, MPI_COMM_WORLD, &local_n0, &local_0_start, &local_n1, &local_1_start); + alloc_local = fftw_mpi_local_size_many_transposed(rank, n, hm, block0, block1, communicator, &local_n0, &local_0_start, &local_n1, &local_1_start); if (local_n0 < 4) throw std::runtime_error("[ FANS3D_Grid ] ERROR: Number of voxels in x-direction is less than 4 in process " + to_string(world_rank)); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(communicator); hsize_t fcount[3], foffset[3]; if (is_zyx) { /* file layout Z Y X */ @@ -381,6 +394,7 @@ void Reader ::ReadMS(int hm) FANS_free(tmp); } else { /* XYZ case: the slab is already in correct order */ + FANS_free(ms); // dealloc mem ms = tmp; // steal the buffer; no copy } @@ -396,11 +410,85 @@ void Reader ::ReadMS(int hm) this->ComputeVolumeFractions(); } +Reader::length_t Reader::serialize_override(buffer_t &buffer, length_t offset) +{ + // write JSON + std::string j_str = inputJson.dump(); + length_t block_size = (j_str.size()+1) * sizeof(char); + buffer.add_size(block_size + header_len); + *reinterpret_cast(std::data(buffer) + offset) = block_size; + offset += header_len; + std::memcpy(std::data(buffer) + offset, j_str.c_str(), block_size); + offset += block_size; + + // write ms + block_size = sizeof(bool) + + 3 * sizeof(int) + + 3 * sizeof(double) + + 5 * sizeof(ptrdiff_t) + + sizeof(int); + buffer.add_size(block_size); + *reinterpret_cast(std::data(buffer) + offset) = is_zyx; offset += sizeof(bool); + std::memcpy(std::data(buffer) + offset, std::data(dims), sizeof(int)*3); offset += sizeof(int) * 3; + std::memcpy(std::data(buffer) + offset, std::data(l_e), sizeof(double)*3); offset += sizeof(double) * 3; + *reinterpret_cast(std::data(buffer) + offset) = alloc_local; offset += sizeof(ptrdiff_t); + *reinterpret_cast(std::data(buffer) + offset) = local_n0; offset += sizeof(ptrdiff_t); + *reinterpret_cast(std::data(buffer) + offset) = local_0_start; offset += sizeof(ptrdiff_t); + *reinterpret_cast(std::data(buffer) + offset) = local_n1; offset += sizeof(ptrdiff_t); + *reinterpret_cast(std::data(buffer) + offset) = local_1_start; offset += sizeof(ptrdiff_t); + *reinterpret_cast(std::data(buffer) + offset) = n_mat; offset += sizeof(int); + + block_size = local_n0 * dims[1] * dims[2] * sizeof(unsigned short); + buffer.add_size(block_size); + std::memcpy(std::data(buffer) + offset, ms, block_size); + offset += block_size; + + return offset; +} + +Reader::length_t Reader::deserialize_override(buffer_t &buffer, length_t offset) +{ + // write JSON + length_t block_size = *reinterpret_cast(std::data(buffer) + offset); + offset += header_len; + std::string tmp(""); + tmp.resize(block_size / sizeof(char), '\0'); + std::memcpy(std::data(tmp), std::data(buffer) + offset, block_size); + offset += block_size; + + std::istringstream iss(tmp); + json j; + iss >> j; + ReadJson(j); + + // write ms + is_zyx = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(bool); + dims.resize(3); + std::memcpy(std::data(dims), std::data(buffer) + offset, sizeof(int)*3); offset += sizeof(int) * 3; + l_e.resize(3); + std::memcpy(std::data(l_e), std::data(buffer) + offset, sizeof(double)*3); offset += sizeof(double) * 3; + alloc_local = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(ptrdiff_t); + local_n0 = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(ptrdiff_t); + local_0_start = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(ptrdiff_t); + local_n1 = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(ptrdiff_t); + local_1_start = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(ptrdiff_t); + n_mat = *reinterpret_cast(std::data(buffer) + offset); offset += sizeof(int); + + block_size = local_n0 * dims[1] * dims[2] * sizeof(unsigned short); + ms = FANS_malloc(static_cast(local_n0) * + static_cast(dims[1]) * + static_cast(dims[2])); + std::memcpy(ms, std::data(buffer) + offset, block_size); + offset += block_size; + + return offset; +} + void Reader::OpenResultsFile(const char *output_fn) { std::snprintf(results_filename, sizeof(results_filename), "%s", output_fn); hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS); - H5Pset_fapl_mpio(plist_id, MPI_COMM_WORLD, MPI_INFO_NULL); + H5Pset_fapl_mpio(plist_id, communicator, MPI_INFO_NULL); results_file_id = H5Fcreate(results_filename, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id); H5Pclose(plist_id);