CFD-GO · lisataldir · Dec 19, 2025
diff --git a/models/flow/d3q27_cumulant/Dynamics.R b/models/flow/d3q27_cumulant/Dynamics.R
@@ -43,6 +43,10 @@ AddNodeType(name="NVelocity", group="BOUNDARY")
 AddNodeType(name="SVelocity", group="BOUNDARY")
 AddNodeType(name="NPressure", group="BOUNDARY")
 AddNodeType(name="SPressure", group="BOUNDARY")
+AddNodeType(name="FVelocity", group="BOUNDARY")
+AddNodeType(name="BVelocity", group="BOUNDARY")
+AddNodeType(name="FPressure", group="BOUNDARY")
+AddNodeType(name="BPressure", group="BOUNDARY")
 AddNodeType(name="NSymmetry", group="ADDITIONALS")
 AddNodeType(name="SSymmetry", group="ADDITIONALS")
 AddNodeType(name="Body", group="BODY")

diff --git a/models/flow/d3q27_cumulant/Dynamics.c.Rt b/models/flow/d3q27_cumulant/Dynamics.c.Rt
@@ -24,9 +24,11 @@ CudaDeviceFunction real_t getRho(){
 	return <?R C(sum(f)) ?>;
 }
 
-CudaDeviceFunction real_t getP(){
-	return ((<?R C(sum(f)) ?>)-1.0)/3.0;
+CudaDeviceFunction real_t getP() {
+    return ((<?R C(sum(f)) ?>) - 1.0) / 3.0;
 }
+
+
 CudaDeviceFunction vector_t getU_(){
         real_t d = getRho();
         vector_t u;
@@ -98,6 +100,7 @@ CudaDeviceFunction real_t getKinE(){
 }
 
 #endif
+
 CudaDeviceFunction float2 Color() {
         float2 ret;
         vector_t u = getU();
@@ -109,6 +112,7 @@ CudaDeviceFunction float2 Color() {
         }
         return ret;
 }
+
 CudaDeviceFunction void BounceBack()
 {
     vector_t p1,p2;
@@ -136,6 +140,16 @@ CudaDeviceFunction void WVelocity()
 <?R ZouHe(EQ, 1, 1, "velocity") ?>
 }
 
+CudaDeviceFunction void FVelocity()
+{
+<?R ZouHe(EQ, 3, -1, "velocity") ?>
+}
+
+CudaDeviceFunction void BVelocity()
+{
+<?R ZouHe(EQ, 3, 1, "velocity") ?>
+}
+
 CudaDeviceFunction void SVelocity()
 {
 <?R ZouHe(EQ, 2, 1, "velocity") ?>
@@ -151,6 +165,16 @@ CudaDeviceFunction void WPressure()
 <?R ZouHe(EQ, 1, 1, "pressure") ?>
 }
 
+CudaDeviceFunction void FPressure()
+{
+<?R ZouHe(EQ, 3, -1, "pressure") ?>
+}
+
+CudaDeviceFunction void BPressure()
+{
+<?R ZouHe(EQ, 3, 1, "pressure") ?>
+}
+
 CudaDeviceFunction void SPressure()
 {
 <?R ZouHe(EQ, 2, 1, "pressure") ?>
@@ -242,26 +266,38 @@ CudaDeviceFunction void RunBoundaries() {
 		WVelocityTurbulent();
 		break;
 	case NODE_EPressure:
-        EPressure();
-        break;
+		EPressure();
+		break;
 	case NODE_WPressure:
 		WPressure();
 		break;
+	case NODE_FPressure:
+		FPressure();
+		break;
+	case NODE_BPressure:
+		BPressure();
+		break;
 	case NODE_SPressure:
-        SPressure();
-        break;
+		SPressure();
+		break;
 	case NODE_NPressure:
-        NPressure();
-        break;
+		NPressure();
+		break;
 	case NODE_WVelocity:
 		WVelocity();
 		break;
-	 case NODE_NVelocity:
-        NVelocity();
-        break;
-	 case NODE_SVelocity:
-        SVelocity();
-        break;
+	case NODE_FVelocity:
+		FVelocity();
+		break;
+	case NODE_BVelocity:
+		BVelocity();
+		break;
+	case NODE_NVelocity:
+		NVelocity();
+		break;
+	case NODE_SVelocity:
+		SVelocity();
+		break;
 	case NODE_EVelocity:
 		EVelocity();
 		break;
@@ -518,3 +554,4 @@ varUYUZ = varUYUZ + getU_().y*getU_().z;
 #endif
 }
 
+
diff --git a/src/ArbLattice.cpp b/src/ArbLattice.cpp
@@ -23,6 +23,7 @@ ArbLattice::ArbLattice(size_t num_snaps_, const UnitEnv& units_, const std::map<
 
 void ArbLattice::initialize(size_t num_snaps_, const std::map<std::string, int>& setting_zones, pugi::xml_node arb_node) {
     const int rank = mpitools::MPI_Rank(comm);
+    sample = std::make_unique<Sampler>(model.get(), units, rank);
     sizes.snaps = num_snaps_;
 #ifdef ADJOINT
     sizes.snaps += 2;  // Adjoint snaps are appended to the total snap allocation
@@ -549,7 +550,28 @@ std::vector<real_t> ArbLattice::getCoord(const Model::Coord& d, real_t scale) {
     return ret;
 }
 
-#include <iostream>
+int ArbLattice::getId(const double &x, const double &y, const double &z) {
+    int id = 0;
+    double epsilon = std::fabs(connect.coord(0, 0)-x) + std::fabs(connect.coord(1, 0)-y) + std::fabs(connect.coord(2, 0)-z);
+    for (int i = 1; i < getLocalSize(); i++) {
+        double tmp = std::fabs(connect.coord(0, i)-x) + std::fabs(connect.coord(1, i)-y) + std::fabs(connect.coord(2, i)-z);
+        if (tmp < epsilon) {
+            epsilon = tmp;
+            id = i;
+        }
+    }
+    return id;
+}
+
+void ArbLattice::getSample(int quant, lbRegion r, real_t scale, real_t *buf) {
+    setSnapIn(Snap);
+#ifdef ADJOINT
+    setAdjSnapIn(aSnap);
+#endif
+    int lid = getId(r.x, r.y, r.z);
+    launcher.SampleQuantity(quant, lid, buf, scale, data);
+}
+
 
 void ArbLattice::initCommManager() {
     if (mpitools::MPI_Size(comm) == 1) return;
@@ -843,4 +865,20 @@ void ArbLattice::resetAverage(){
             CudaMemset(&getSnapPtr(Snap)[f.id*sizes.snaps_pitch], 0, sizes.snaps_pitch*sizeof(real_t));
         }
     }
-}
+}
+
+void ArbLattice::updateAllSamples(){
+    const int rank = mpitools::MPI_Rank(comm);
+    if (sample->size != 0) {
+	    for (size_t j = 0; j < sample->spoints.size(); j++) {
+		    if (rank == sample->spoints[j].rank) {
+		        for(const Model::Quantity& q : model->quantities) {
+                    if (sample->quant->in(q.name.c_str())){
+                        double v = sample->units->alt(q.unit.c_str());
+                        getSample(q.id, sample->spoints[j].location, 1/v, &sample->gpu_buffer[sample->location[q.name.c_str()]+(data.iter - sample->startIter)*sample->size + sample->totalIter*j*sample->size]); 
+                    }
+                }
+	        } 
+        }
+    }
+}
diff --git a/src/ArbLattice.hpp b/src/ArbLattice.hpp
@@ -13,6 +13,7 @@
 #include "Region.h"
 #include "pugixml.hpp"
 #include "utils.h"
+#include "Sampler.h"
 
 /// Note on node numbering: We have 2 indexing schemes - a global and a local one.
 /// Globally, nodes are numbered such that consecutive ranks own monotonically increasing intervals, (e.g. rank 0 owns nodes [0, n_0), rank 1 owns nodes [n_0, n_1), etc.) This is required for ParMETIS, but it is also a convenient scheme in general.
@@ -71,6 +72,7 @@ class ArbLattice : public LatticeBase {
    public:
     static constexpr size_t Q = Model_m::Q;    /// Stencil size
     static constexpr size_t NF = Model_m::NF;  /// Number of fields
+    std::unique_ptr<Sampler> sample;           /// initializing sample with zero size
 
     ArbLattice(size_t num_snaps_, const UnitEnv& units_, const std::map<std::string, int>& setting_zones, pugi::xml_node arb_node, MPI_Comm comm_);
     ArbLattice(const ArbLattice&) = delete;
@@ -90,6 +92,8 @@ class ArbLattice : public LatticeBase {
     virtual std::vector<real_t> getField(const Model::Field& f);
     virtual std::vector<real_t> getFieldAdj(const Model::Field& f);
     virtual std::vector<real_t> getCoord(const Model::Coord& q, real_t scale = 1);
+    virtual int getId(const double &x,const  double &y, const double &z);
+    virtual void getSample(int quant, lbRegion r, real_t scale, real_t *buf);
 
     virtual void setFlags(const std::vector<big_flag_t>& x);
     virtual void setField(const Model::Field& f, const std::vector<real_t>& x);
@@ -101,6 +105,7 @@ class ArbLattice : public LatticeBase {
     const std::vector<unsigned>& getLocalPermutation() const { return local_permutation; }
 
     void resetAverage();
+    void updateAllSamples();
 
    protected:
     ArbLatticeLauncher launcher;  /// Launcher responsible for running CUDA kernels on the lattice

diff --git a/src/ArbLatticeLauncher.h.Rt b/src/ArbLatticeLauncher.h.Rt
@@ -21,6 +21,7 @@ struct ArbLatticeLauncher {
     void unpack(CudaStream_t stream) const;
 
     void getQuantity(int quant, real_t* host_tab, real_t scale, const LatticeData& data) const;
+    void SampleQuantity(int quant, int lid, real_t *host_tab, real_t scale, const LatticeData& data);
 
 private:
 <?R for (q in rows(Quantities)) { ifdef(q$adjoint);
@@ -31,8 +32,12 @@ if (q$adjoint) {
 }
 ?>
     void getQuantity<?%s q$name ?>(<?%s q$type ?>* tab, real_t scale, const LatticeData& data) const;
+    void getSample<?%s q$name ?>(int lid, <?%s q$type ?>* tab, real_t scale, const LatticeData& data) const;
 <?R }
 ifdef() ?>
 };
 
 #endif  // ARBLATTICELAUNCHER_H
+
+
+
diff --git a/src/ArbLatticeLauncher.hpp.Rt b/src/ArbLatticeLauncher.hpp.Rt
@@ -87,6 +87,23 @@ ifdef();
 	}
 }
 
+void ArbLatticeLauncher::SampleQuantity(int quant, int lid, real_t* host_tab, real_t scale, const LatticeData &data) {
+    switch(quant) {	<?R
+for (q in rows(Quantities)) { ifdef(q$adjoint);
+?>
+        case <?%s q$Index ?>: {
+            const auto gpu_tab = cudaMakeUnique<<?%s q$type ?>>(1);
+            getSample<?%s q$name ?>(lid, gpu_tab.get(), scale, data);
+            CudaMemcpy(host_tab, gpu_tab.get(), sizeof(<?%s q$type ?>), CudaMemcpyDeviceToHost);
+            break;
+        } <?R
+}
+ifdef();
+?>
+	}
+}
+
+
 <?R for (q in rows(Quantities)) { ifdef(q$adjoint);
 if (q$adjoint) {
   node = "Node_Adj"
@@ -128,10 +145,47 @@ if (q$type == "vector_t") {
     }
 };
 
+struct GetSampleArbExecutor<?%s q$name?> : public LinearExecutor {
+    ArbLatticeContainer container;
+    LatticeData data;
+    int lid;
+    <?%s q$type ?>* buf;
+    real_t scale;
+
+    CudaDeviceFunction void Execute() const {
+        using LA = ArbLatticeAccess; <?R
+        if (q$adjoint) { ?>
+            using N = Node< LA, Adjoint, NoGlobals, Get >; <?R
+        } else { ?>
+            using N = Node< LA, Primal, NoGlobals, Get >; <?R
+        }?>
+        LA acc(lid, container);
+        N now(acc, data);
+        acc.pop(now); <?R
+        if (q$adjoint) { ?>
+            acc.pop_adj(now); <?R
+        } ?>
+        auto val = now.get<?%s q$name ?>(); <?R
+        if (q$type == "vector_t") {
+            for (coef in c("x","y","z")) { ?>
+                val.<?%s coef ?> *= scale; <?R
+            }
+        } else { ?>
+            val *= scale; <?R
+        } ?>
+        buf[0] = val;    
+    }
+};
+
 void ArbLatticeLauncher::getQuantity<?%s q$name ?>(<?%s q$type ?>* tab, real_t scale, const LatticeData& data) const {
     const GetQuantityArbExecutor<?%s q$name?> executor{{container.num_border_nodes + container.num_interior_nodes}, container, data, tab, scale};
     LaunchExecutor(executor);
 }
+
+void ArbLatticeLauncher::getSample<?%s q$name ?>(int lid, <?%s q$type ?>* tab, real_t scale, const LatticeData& data) const {
+    const GetSampleArbExecutor<?%s q$name?> executor{{1}, container, data, lid, tab, scale};
+    LaunchExecutor(executor);
+}
 <?R }
 ifdef() ?>
 

diff --git a/src/Handlers/cbFailcheck.cpp b/src/Handlers/cbFailcheck.cpp
@@ -1,4 +1,5 @@
 #include "cbFailcheck.h"
+#include <algorithm>
 
 std::string cbFailcheck::xmlname = "Failcheck";