Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions models/flow/d3q27_cumulant/Dynamics.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ AddNodeType(name="NVelocity", group="BOUNDARY")
AddNodeType(name="SVelocity", group="BOUNDARY")
AddNodeType(name="NPressure", group="BOUNDARY")
AddNodeType(name="SPressure", group="BOUNDARY")
AddNodeType(name="FVelocity", group="BOUNDARY")
AddNodeType(name="BVelocity", group="BOUNDARY")
AddNodeType(name="FPressure", group="BOUNDARY")
AddNodeType(name="BPressure", group="BOUNDARY")
AddNodeType(name="NSymmetry", group="ADDITIONALS")
AddNodeType(name="SSymmetry", group="ADDITIONALS")
AddNodeType(name="Body", group="BODY")
Expand Down
65 changes: 51 additions & 14 deletions models/flow/d3q27_cumulant/Dynamics.c.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ CudaDeviceFunction real_t getRho(){
return <?R C(sum(f)) ?>;
}

CudaDeviceFunction real_t getP(){
return ((<?R C(sum(f)) ?>)-1.0)/3.0;
CudaDeviceFunction real_t getP() {
return ((<?R C(sum(f)) ?>) - 1.0) / 3.0;
}


CudaDeviceFunction vector_t getU_(){
real_t d = getRho();
vector_t u;
Expand Down Expand Up @@ -98,6 +100,7 @@ CudaDeviceFunction real_t getKinE(){
}

#endif

CudaDeviceFunction float2 Color() {
float2 ret;
vector_t u = getU();
Expand All @@ -109,6 +112,7 @@ CudaDeviceFunction float2 Color() {
}
return ret;
}

CudaDeviceFunction void BounceBack()
{
vector_t p1,p2;
Expand Down Expand Up @@ -136,6 +140,16 @@ CudaDeviceFunction void WVelocity()
<?R ZouHe(EQ, 1, 1, "velocity") ?>
}

CudaDeviceFunction void FVelocity()
{
<?R ZouHe(EQ, 3, -1, "velocity") ?>
}

CudaDeviceFunction void BVelocity()
{
<?R ZouHe(EQ, 3, 1, "velocity") ?>
}

CudaDeviceFunction void SVelocity()
{
<?R ZouHe(EQ, 2, 1, "velocity") ?>
Expand All @@ -151,6 +165,16 @@ CudaDeviceFunction void WPressure()
<?R ZouHe(EQ, 1, 1, "pressure") ?>
}

CudaDeviceFunction void FPressure()
{
<?R ZouHe(EQ, 3, -1, "pressure") ?>
}

CudaDeviceFunction void BPressure()
{
<?R ZouHe(EQ, 3, 1, "pressure") ?>
}

CudaDeviceFunction void SPressure()
{
<?R ZouHe(EQ, 2, 1, "pressure") ?>
Expand Down Expand Up @@ -242,26 +266,38 @@ CudaDeviceFunction void RunBoundaries() {
WVelocityTurbulent();
break;
case NODE_EPressure:
EPressure();
break;
EPressure();
break;
case NODE_WPressure:
WPressure();
break;
case NODE_FPressure:
FPressure();
break;
case NODE_BPressure:
BPressure();
break;
case NODE_SPressure:
SPressure();
break;
SPressure();
break;
case NODE_NPressure:
NPressure();
break;
NPressure();
break;
case NODE_WVelocity:
WVelocity();
break;
case NODE_NVelocity:
NVelocity();
break;
case NODE_SVelocity:
SVelocity();
break;
case NODE_FVelocity:
FVelocity();
break;
case NODE_BVelocity:
BVelocity();
break;
case NODE_NVelocity:
NVelocity();
break;
case NODE_SVelocity:
SVelocity();
break;
case NODE_EVelocity:
EVelocity();
break;
Expand Down Expand Up @@ -518,3 +554,4 @@ varUYUZ = varUYUZ + getU_().y*getU_().z;
#endif
}


42 changes: 40 additions & 2 deletions src/ArbLattice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ ArbLattice::ArbLattice(size_t num_snaps_, const UnitEnv& units_, const std::map<

void ArbLattice::initialize(size_t num_snaps_, const std::map<std::string, int>& setting_zones, pugi::xml_node arb_node) {
const int rank = mpitools::MPI_Rank(comm);
sample = std::make_unique<Sampler>(model.get(), units, rank);
sizes.snaps = num_snaps_;
#ifdef ADJOINT
sizes.snaps += 2; // Adjoint snaps are appended to the total snap allocation
Expand Down Expand Up @@ -549,7 +550,28 @@ std::vector<real_t> ArbLattice::getCoord(const Model::Coord& d, real_t scale) {
return ret;
}

#include <iostream>
int ArbLattice::getId(const double &x, const double &y, const double &z) {
int id = 0;
double epsilon = std::fabs(connect.coord(0, 0)-x) + std::fabs(connect.coord(1, 0)-y) + std::fabs(connect.coord(2, 0)-z);
for (int i = 1; i < getLocalSize(); i++) {
double tmp = std::fabs(connect.coord(0, i)-x) + std::fabs(connect.coord(1, i)-y) + std::fabs(connect.coord(2, i)-z);
if (tmp < epsilon) {
epsilon = tmp;
id = i;
}
}
return id;
}

void ArbLattice::getSample(int quant, lbRegion r, real_t scale, real_t *buf) {
setSnapIn(Snap);
#ifdef ADJOINT
setAdjSnapIn(aSnap);
#endif
int lid = getId(r.x, r.y, r.z);
launcher.SampleQuantity(quant, lid, buf, scale, data);
}


void ArbLattice::initCommManager() {
if (mpitools::MPI_Size(comm) == 1) return;
Expand Down Expand Up @@ -843,4 +865,20 @@ void ArbLattice::resetAverage(){
CudaMemset(&getSnapPtr(Snap)[f.id*sizes.snaps_pitch], 0, sizes.snaps_pitch*sizeof(real_t));
}
}
}
}

void ArbLattice::updateAllSamples(){
const int rank = mpitools::MPI_Rank(comm);
if (sample->size != 0) {
for (size_t j = 0; j < sample->spoints.size(); j++) {
if (rank == sample->spoints[j].rank) {
for(const Model::Quantity& q : model->quantities) {
if (sample->quant->in(q.name.c_str())){
double v = sample->units->alt(q.unit.c_str());
getSample(q.id, sample->spoints[j].location, 1/v, &sample->gpu_buffer[sample->location[q.name.c_str()]+(data.iter - sample->startIter)*sample->size + sample->totalIter*j*sample->size]);
}
}
}
}
}
}
5 changes: 5 additions & 0 deletions src/ArbLattice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "Region.h"
#include "pugixml.hpp"
#include "utils.h"
#include "Sampler.h"

/// Note on node numbering: We have 2 indexing schemes - a global and a local one.
/// Globally, nodes are numbered such that consecutive ranks own monotonically increasing intervals, (e.g. rank 0 owns nodes [0, n_0), rank 1 owns nodes [n_0, n_1), etc.) This is required for ParMETIS, but it is also a convenient scheme in general.
Expand Down Expand Up @@ -71,6 +72,7 @@ class ArbLattice : public LatticeBase {
public:
static constexpr size_t Q = Model_m::Q; /// Stencil size
static constexpr size_t NF = Model_m::NF; /// Number of fields
std::unique_ptr<Sampler> sample; /// initializing sample with zero size

ArbLattice(size_t num_snaps_, const UnitEnv& units_, const std::map<std::string, int>& setting_zones, pugi::xml_node arb_node, MPI_Comm comm_);
ArbLattice(const ArbLattice&) = delete;
Expand All @@ -90,6 +92,8 @@ class ArbLattice : public LatticeBase {
virtual std::vector<real_t> getField(const Model::Field& f);
virtual std::vector<real_t> getFieldAdj(const Model::Field& f);
virtual std::vector<real_t> getCoord(const Model::Coord& q, real_t scale = 1);
virtual int getId(const double &x,const double &y, const double &z);
virtual void getSample(int quant, lbRegion r, real_t scale, real_t *buf);

virtual void setFlags(const std::vector<big_flag_t>& x);
virtual void setField(const Model::Field& f, const std::vector<real_t>& x);
Expand All @@ -101,6 +105,7 @@ class ArbLattice : public LatticeBase {
const std::vector<unsigned>& getLocalPermutation() const { return local_permutation; }

void resetAverage();
void updateAllSamples();

protected:
ArbLatticeLauncher launcher; /// Launcher responsible for running CUDA kernels on the lattice
Expand Down
5 changes: 5 additions & 0 deletions src/ArbLatticeLauncher.h.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ struct ArbLatticeLauncher {
void unpack(CudaStream_t stream) const;

void getQuantity(int quant, real_t* host_tab, real_t scale, const LatticeData& data) const;
void SampleQuantity(int quant, int lid, real_t *host_tab, real_t scale, const LatticeData& data);

private:
<?R for (q in rows(Quantities)) { ifdef(q$adjoint);
Expand All @@ -31,8 +32,12 @@ if (q$adjoint) {
}
?>
void getQuantity<?%s q$name ?>(<?%s q$type ?>* tab, real_t scale, const LatticeData& data) const;
void getSample<?%s q$name ?>(int lid, <?%s q$type ?>* tab, real_t scale, const LatticeData& data) const;
<?R }
ifdef() ?>
};

#endif // ARBLATTICELAUNCHER_H



54 changes: 54 additions & 0 deletions src/ArbLatticeLauncher.hpp.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,23 @@ ifdef();
}
}

void ArbLatticeLauncher::SampleQuantity(int quant, int lid, real_t* host_tab, real_t scale, const LatticeData &data) {
switch(quant) { <?R
for (q in rows(Quantities)) { ifdef(q$adjoint);
?>
case <?%s q$Index ?>: {
const auto gpu_tab = cudaMakeUnique<<?%s q$type ?>>(1);
getSample<?%s q$name ?>(lid, gpu_tab.get(), scale, data);
CudaMemcpy(host_tab, gpu_tab.get(), sizeof(<?%s q$type ?>), CudaMemcpyDeviceToHost);
break;
} <?R
}
ifdef();
?>
}
}


<?R for (q in rows(Quantities)) { ifdef(q$adjoint);
if (q$adjoint) {
node = "Node_Adj"
Expand Down Expand Up @@ -128,10 +145,47 @@ if (q$type == "vector_t") {
}
};

struct GetSampleArbExecutor<?%s q$name?> : public LinearExecutor {
ArbLatticeContainer container;
LatticeData data;
int lid;
<?%s q$type ?>* buf;
real_t scale;

CudaDeviceFunction void Execute() const {
using LA = ArbLatticeAccess; <?R
if (q$adjoint) { ?>
using N = Node< LA, Adjoint, NoGlobals, Get >; <?R
} else { ?>
using N = Node< LA, Primal, NoGlobals, Get >; <?R
}?>
LA acc(lid, container);
N now(acc, data);
acc.pop(now); <?R
if (q$adjoint) { ?>
acc.pop_adj(now); <?R
} ?>
auto val = now.get<?%s q$name ?>(); <?R
if (q$type == "vector_t") {
for (coef in c("x","y","z")) { ?>
val.<?%s coef ?> *= scale; <?R
}
} else { ?>
val *= scale; <?R
} ?>
buf[0] = val;
}
};

void ArbLatticeLauncher::getQuantity<?%s q$name ?>(<?%s q$type ?>* tab, real_t scale, const LatticeData& data) const {
const GetQuantityArbExecutor<?%s q$name?> executor{{container.num_border_nodes + container.num_interior_nodes}, container, data, tab, scale};
LaunchExecutor(executor);
}

void ArbLatticeLauncher::getSample<?%s q$name ?>(int lid, <?%s q$type ?>* tab, real_t scale, const LatticeData& data) const {
const GetSampleArbExecutor<?%s q$name?> executor{{1}, container, data, lid, tab, scale};
LaunchExecutor(executor);
}
<?R }
ifdef() ?>

Expand Down
1 change: 1 addition & 0 deletions src/Handlers/cbFailcheck.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "cbFailcheck.h"
#include <algorithm>

std::string cbFailcheck::xmlname = "Failcheck";

Expand Down
Loading
Loading