Skip to content

Commit 3f85e0a

Browse files
Merge pull request #646 from lattice/feature/gauge-comms-cleanup
Feature/gauge comms cleanup
2 parents 5b8e90c + 658089a commit 3f85e0a

File tree

111 files changed

+2096
-3140
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+2096
-3140
lines changed

include/clover_field.h

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ namespace quda {
1313
void *norm;
1414
void *cloverInv;
1515
void *invNorm;
16-
17-
//for twisted mass only:
16+
double csw; //! Clover coefficient
1817
bool twisted; // whether to create twisted mass clover
1918
double mu2;
19+
double rho;
2020

2121
QudaCloverFieldOrder order;
2222
QudaFieldCreate create;
@@ -28,13 +28,13 @@ namespace quda {
2828

2929
CloverFieldParam() : LatticeFieldParam(),
3030
direct(true), inverse(true), clover(nullptr), norm(nullptr),
31-
cloverInv(nullptr), invNorm(nullptr), twisted(false), mu2(0.0) { }
31+
cloverInv(nullptr), invNorm(nullptr), twisted(false), mu2(0.0), rho(0.0) { }
3232

3333
CloverFieldParam(const CloverFieldParam &param) : LatticeFieldParam(param),
3434
direct(param.direct), inverse(param.inverse),
3535
clover(param.clover), norm(param.norm),
3636
cloverInv(param.cloverInv), invNorm(param.invNorm),
37-
twisted(param.twisted), mu2(param.mu2) { }
37+
twisted(param.twisted), mu2(param.mu2), rho(param.rho) { }
3838

3939
CloverFieldParam(const CloverField &field);
4040
};
@@ -56,13 +56,15 @@ namespace quda {
5656
void *cloverInv;
5757
void *invNorm;
5858

59+
double csw;
5960
bool twisted;
6061
double mu2;
62+
double rho;
6163

6264
QudaCloverFieldOrder order;
6365
QudaFieldCreate create;
6466

65-
double *trlog;
67+
mutable double trlog[2];
6668

6769
public:
6870
CloverField(const CloverFieldParam &param);
@@ -74,19 +76,57 @@ namespace quda {
7476
const void* Norm(bool inverse=false) const { return inverse ? invNorm : norm; }
7577

7678
/**
77-
This function returns true if the field is stored in an
78-
internal field order for the given precision.
79+
@return True if the field is stored in an internal field order
80+
for the given precision.
7981
*/
8082
bool isNative() const;
8183

84+
/**
85+
@return Pointer to array storing trlog on each parity
86+
*/
8287
double* TrLog() const { return trlog; }
8388

89+
/**
90+
@return The order of the field
91+
*/
8492
QudaCloverFieldOrder Order() const { return order; }
93+
94+
/**
95+
@return The size of the fieldallocation
96+
*/
8597
size_t Bytes() const { return bytes; }
98+
99+
/**
100+
@return The size of the norm allocation
101+
*/
86102
size_t NormBytes() const { return norm_bytes; }
87-
//new!
88-
bool Twisted() const {return twisted; }
89-
double Mu2() const {return mu2; }
103+
104+
/**
105+
@return Clover coefficient (usually includes kappa)
106+
*/
107+
bool Csw() const { return csw; }
108+
109+
/**
110+
@return If the clover field is associated with twisted-clover fermions
111+
*/
112+
bool Twisted() const { return twisted; }
113+
114+
/**
115+
@return mu^2 factor baked into inverse clover field (for twisted-clover inverse)
116+
*/
117+
double Mu2() const { return mu2; }
118+
119+
/**
120+
@return rho factor backed into the clover field, (for real
121+
diagonal additive Hasenbusch), e.g., A + rho
122+
*/
123+
double Rho() const { return rho; }
124+
125+
/**
126+
@brief Bakes in the rho factor into the clover field, (for real
127+
diagonal additive Hasenbusch), e.g., A + rho
128+
*/
129+
void setRho(double rho);
90130
};
91131

92132
class cudaCloverField : public CloverField {
@@ -201,6 +241,7 @@ namespace quda {
201241
size_t bytes; // sizeof each clover field (per parity)
202242
size_t norm_bytes; // sizeof each norm field (per parity)
203243
int stride; // stride (volume + pad)
244+
double rho; // rho additive factor
204245

205246
#ifdef USE_TEXTURE_OBJECTS
206247
const cudaTextureObject_t &evenTex;
@@ -214,7 +255,8 @@ namespace quda {
214255
#endif
215256

216257
FullClover(const cudaCloverField &clover, bool inverse=false) :
217-
precision(clover.precision), bytes(clover.bytes), norm_bytes(clover.norm_bytes), stride(clover.stride)
258+
precision(clover.precision), bytes(clover.bytes), norm_bytes(clover.norm_bytes),
259+
stride(clover.stride), rho(clover.rho)
218260
#ifdef USE_TEXTURE_OBJECTS
219261
, evenTex(inverse ? clover.evenInvTex : clover.evenTex)
220262
, evenNormTex(inverse ? clover.evenInvNormTex : clover.evenNormTex)

include/color_spinor_field.h

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -277,18 +277,11 @@ namespace quda {
277277
void* ghostNorm[2][QUDA_MAX_DIM]; // pointers to ghost norms - NULL by default
278278

279279
mutable int ghostFace[QUDA_MAX_DIM];// the size of each face
280-
mutable int ghostOffset[QUDA_MAX_DIM][2]; // offsets to each ghost zone
281-
mutable int ghostNormOffset[QUDA_MAX_DIM][2]; // offsets to each ghost zone for norm field
282-
283-
mutable size_t ghost_length; // length of ghost zone
284-
mutable size_t ghost_norm_length; // length of ghost zone for norm
285280

286281
mutable void *ghost_buf[2*QUDA_MAX_DIM]; // wrapper that points to current ghost zone
287282

288283
size_t bytes; // size in bytes of spinor field
289284
size_t norm_bytes; // size in bytes of norm field
290-
mutable size_t ghost_bytes; // size in bytes of the ghost field
291-
mutable size_t ghost_face_bytes[QUDA_MAX_DIM];
292285

293286
QudaSiteSubset siteSubset;
294287
QudaSiteOrder siteOrder;
@@ -304,6 +297,11 @@ namespace quda {
304297
//
305298
CompositeColorSpinorField components;
306299

300+
/**
301+
Compute the required extended ghost zone sizes and offsets
302+
@param[in] nFace The depth of the halo
303+
@param[in] spin_project Whether we are spin projecting
304+
*/
307305
void createGhostZone(int nFace, bool spin_project=true) const;
308306

309307
// resets the above attributes based on contents of param
@@ -403,7 +401,6 @@ namespace quda {
403401
QudaFieldOrder FieldOrder() const { return fieldOrder; }
404402
QudaGammaBasis GammaBasis() const { return gammaBasis; }
405403

406-
size_t GhostLength() const { return ghost_length; }
407404
const int *GhostFace() const { return ghostFace; }
408405
int GhostOffset(const int i) const { return ghostOffset[i][0]; }
409406
int GhostOffset(const int i, const int j) const { return ghostOffset[i][j]; }
@@ -486,9 +483,6 @@ namespace quda {
486483

487484
bool reference; // whether the field is a reference or not
488485

489-
static size_t ghostFaceBytes;
490-
static bool initGhostFaceBuffer;
491-
492486
mutable void *ghost_field_tex[4]; // instance pointer to GPU halo buffer (used to check if static allocation has changed)
493487

494488
void create(const QudaFieldCreate);
@@ -531,23 +525,13 @@ namespace quda {
531525
*/
532526
void createComms(int nFace, bool spin_project=true);
533527

534-
/**
535-
@brief Destroy the communication handlers and buffers
536-
*/
537-
void destroyComms();
538-
539528
/**
540529
@brief Allocate the ghost buffers
541530
@param[in] nFace Depth of each halo
542531
@param[in] spin_project Whether the halos are spin projected (Wilson-type fermions only)
543532
*/
544533
void allocateGhostBuffer(int nFace, bool spin_project=true) const;
545534

546-
/**
547-
@brief Free statically allocated ghost buffers
548-
*/
549-
static void freeGhostBuffer(void);
550-
551535
/**
552536
@brief Packs the cudaColorSpinorField's ghost zone
553537
@param[in] nFace How many faces to pack (depth)

include/color_spinor_field_order.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,21 +1079,21 @@ namespace quda {
10791079
{ if (volumeCB != a.Stride()) errorQuda("Stride must equal volume for this field order"); }
10801080
virtual ~QDPJITDiracOrder() { ; }
10811081

1082-
__device__ __host__ inline void load(RegType v[Ns*Nc*2], int x, int parity=1) const {
1082+
__device__ __host__ inline void load(RegType v[Ns*Nc*2], int x, int parity=0) const {
10831083
for (int s=0; s<Ns; s++) {
10841084
for (int c=0; c<Nc; c++) {
10851085
for (int z=0; z<2; z++) {
1086-
v[(s*Nc+c)*2+z] = field[(((z*Nc + c)*Ns + s)*2 + parity)*volumeCB + x];
1086+
v[(s*Nc+c)*2+z] = field[(((z*Nc + c)*Ns + s)*2 + (1-parity))*volumeCB + x];
10871087
}
10881088
}
10891089
}
10901090
}
10911091

1092-
__device__ __host__ inline void save(const RegType v[Ns*Nc*2], int x, int parity=1) {
1092+
__device__ __host__ inline void save(const RegType v[Ns*Nc*2], int x, int parity=0) {
10931093
for (int s=0; s<Ns; s++) {
10941094
for (int c=0; c<Nc; c++) {
10951095
for (int z=0; z<2; z++) {
1096-
field[(((z*Nc + c)*Ns + s)*2 + parity)*volumeCB + x] = v[(s*Nc+c)*2+z];
1096+
field[(((z*Nc + c)*Ns + s)*2 + (1-parity))*volumeCB + x] = v[(s*Nc+c)*2+z];
10971097
}
10981098
}
10991099
}

include/comm_quda.h

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef _COMM_QUDA_H
2-
#define _COMM_QUDA_H
1+
#pragma once
2+
#include <cstdint>
33

44
#ifdef __cplusplus
55
extern "C" {
@@ -212,12 +212,24 @@ extern "C" {
212212
void comm_allreduce_max(double* data);
213213
void comm_allreduce_array(double* data, size_t size);
214214
void comm_allreduce_int(int* data);
215+
void comm_allreduce_xor(uint64_t *data);
215216
void comm_broadcast(void *data, size_t nbytes);
216217
void comm_barrier(void);
217218
void comm_abort(int status);
218219

220+
void reduceMaxDouble(double &);
221+
void reduceDouble(double &);
222+
void reduceDoubleArray(double *, const int len);
223+
int commDim(int);
224+
int commCoords(int);
225+
int commDimPartitioned(int dir);
226+
void commDimPartitionedSet(int dir);
227+
bool commGlobalReduction();
228+
void commGlobalReductionSet(bool global_reduce);
229+
230+
bool commAsyncReduction();
231+
void commAsyncReductionSet(bool global_reduce);
232+
219233
#ifdef __cplusplus
220234
}
221235
#endif
222-
223-
#endif /* _COMM_QUDA_H */

include/dirac_quda.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#include <gauge_field.h>
77
#include <clover_field.h>
88
#include <dslash_quda.h>
9-
#include <face_quda.h>
109
#include <blas_quda.h>
1110

1211
#include <typeinfo>

include/dslash_quda.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
#include <quda_internal.h>
55
#include <tune_quda.h>
6-
#include <face_quda.h>
76
#include <gauge_field.h>
87

98
#include <worker.h>

0 commit comments

Comments
 (0)