lattice
diff --git a/‎include/clover_field.h‎
Lines changed: 53 additions & 11 deletions b/‎include/clover_field.h‎
Lines changed: 53 additions & 11 deletions
diff --git a/‎include/color_spinor_field.h‎
Lines changed: 5 additions & 21 deletions b/‎include/color_spinor_field.h‎
Lines changed: 5 additions & 21 deletions
diff --git a/‎include/color_spinor_field_order.h‎
Lines changed: 4 additions & 4 deletions b/‎include/color_spinor_field_order.h‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎include/comm_quda.h‎
Lines changed: 16 additions & 4 deletions b/‎include/comm_quda.h‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎include/dirac_quda.h‎
Lines changed: 0 additions & 1 deletion b/‎include/dirac_quda.h‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎include/dslash_quda.h‎
Lines changed: 0 additions & 1 deletion b/‎include/dslash_quda.h‎
Lines changed: 0 additions & 1 deletion
@@ -13,10 +13,10 @@ namespace quda {
     void *norm;
     void *cloverInv;
     void *invNorm;
-
-//for twisted mass only:
+    double csw;  //! Clover coefficient
     bool twisted; // whether to create twisted mass clover
     double mu2;
+    double rho;
 
     QudaCloverFieldOrder order;
     QudaFieldCreate create;
@@ -28,13 +28,13 @@ namespace quda {
 
     CloverFieldParam() :  LatticeFieldParam(),
       direct(true), inverse(true), clover(nullptr), norm(nullptr),
-      cloverInv(nullptr), invNorm(nullptr), twisted(false), mu2(0.0) { }
+      cloverInv(nullptr), invNorm(nullptr), twisted(false), mu2(0.0), rho(0.0) { }
 
     CloverFieldParam(const CloverFieldParam &param) :  LatticeFieldParam(param),
       direct(param.direct), inverse(param.inverse),
       clover(param.clover), norm(param.norm),
       cloverInv(param.cloverInv), invNorm(param.invNorm),
-      twisted(param.twisted), mu2(param.mu2) { }
+      twisted(param.twisted), mu2(param.mu2), rho(param.rho) { }
 
     CloverFieldParam(const CloverField &field);
   };
@@ -56,13 +56,15 @@ namespace quda {
     void *cloverInv;
     void *invNorm;
 
+    double csw;
     bool twisted; 
     double mu2;
+    double rho;
 
     QudaCloverFieldOrder order;
     QudaFieldCreate create;
 
-    double *trlog;
+    mutable double trlog[2];
 
   public:
     CloverField(const CloverFieldParam &param);
@@ -74,19 +76,57 @@ namespace quda {
     const void* Norm(bool inverse=false) const { return inverse ? invNorm : norm; }
 
     /**
-       This function returns true if the field is stored in an
-       internal field order for the given precision.
+       @return True if the field is stored in an internal field order
+       for the given precision.
     */
     bool isNative() const;
 
+    /**
+       @return Pointer to array storing trlog on each parity
+    */
     double* TrLog() const { return trlog; }
 
+    /**
+       @return The order of the field
+     */
     QudaCloverFieldOrder Order() const { return order; }
+
+    /**
+       @return The size of the fieldallocation
+     */
     size_t Bytes() const { return bytes; }
+
+    /**
+       @return The size of the norm allocation
+     */
     size_t NormBytes() const { return norm_bytes; }
-//new!
-    bool Twisted() const {return twisted; }
-    double Mu2() const {return mu2; }
+
+    /**
+       @return Clover coefficient (usually includes kappa)
+    */
+    bool Csw() const { return csw; }
+
+    /**
+       @return If the clover field is associated with twisted-clover fermions
+    */
+    bool Twisted() const { return twisted; }
+
+    /**
+       @return mu^2 factor baked into inverse clover field (for twisted-clover inverse)
+    */
+    double Mu2() const { return mu2; }
+
+    /**
+       @return rho factor backed into the clover field, (for real
+       diagonal additive Hasenbusch), e.g., A + rho
+    */
+    double Rho() const { return rho; }
+
+    /**
+       @brief Bakes in the rho factor into the clover field, (for real
+       diagonal additive Hasenbusch), e.g., A + rho
+    */
+    void setRho(double rho);
   };
 
   class cudaCloverField : public CloverField {
@@ -201,6 +241,7 @@ namespace quda {
     size_t bytes; // sizeof each clover field (per parity)
     size_t norm_bytes; // sizeof each norm field (per parity)
     int stride; // stride (volume + pad)
+    double rho; // rho additive factor
 
 #ifdef USE_TEXTURE_OBJECTS
     const cudaTextureObject_t &evenTex;
@@ -214,7 +255,8 @@ namespace quda {
 #endif
 
     FullClover(const cudaCloverField &clover, bool inverse=false) :
-    precision(clover.precision), bytes(clover.bytes), norm_bytes(clover.norm_bytes), stride(clover.stride)
+    precision(clover.precision), bytes(clover.bytes), norm_bytes(clover.norm_bytes),
+      stride(clover.stride), rho(clover.rho)
 #ifdef USE_TEXTURE_OBJECTS
 	, evenTex(inverse ? clover.evenInvTex : clover.evenTex)
 	, evenNormTex(inverse ? clover.evenInvNormTex : clover.evenNormTex)
 
@@ -277,18 +277,11 @@ namespace quda {
     void* ghostNorm[2][QUDA_MAX_DIM]; // pointers to ghost norms - NULL by default
 
     mutable int ghostFace[QUDA_MAX_DIM];// the size of each face
-    mutable int ghostOffset[QUDA_MAX_DIM][2]; // offsets to each ghost zone
-    mutable int ghostNormOffset[QUDA_MAX_DIM][2]; // offsets to each ghost zone for norm field
-
-    mutable size_t ghost_length; // length of ghost zone
-    mutable size_t ghost_norm_length; // length of ghost zone for norm
 
     mutable void *ghost_buf[2*QUDA_MAX_DIM]; // wrapper that points to current ghost zone
 
     size_t bytes; // size in bytes of spinor field
     size_t norm_bytes; // size in bytes of norm field
-    mutable size_t ghost_bytes; // size in bytes of the ghost field
-    mutable size_t ghost_face_bytes[QUDA_MAX_DIM];
 
     QudaSiteSubset siteSubset;
     QudaSiteOrder siteOrder;
@@ -304,6 +297,11 @@ namespace quda {
     //
     CompositeColorSpinorField components;
 
+    /**
+       Compute the required extended ghost zone sizes and offsets
+       @param[in] nFace The depth of the halo
+       @param[in] spin_project Whether we are spin projecting
+    */
     void createGhostZone(int nFace, bool spin_project=true) const;
 
     // resets the above attributes based on contents of param
@@ -403,7 +401,6 @@ namespace quda {
     QudaFieldOrder FieldOrder() const { return fieldOrder; }
     QudaGammaBasis GammaBasis() const { return gammaBasis; }
 
-    size_t GhostLength() const { return ghost_length; }
     const int *GhostFace() const { return ghostFace; }
     int GhostOffset(const int i) const { return ghostOffset[i][0]; }
     int GhostOffset(const int i, const int j) const { return ghostOffset[i][j]; }
@@ -486,9 +483,6 @@ namespace quda {
 
     bool reference; // whether the field is a reference or not
 
-    static size_t ghostFaceBytes;
-    static bool initGhostFaceBuffer;
-
     mutable void *ghost_field_tex[4]; // instance pointer to GPU halo buffer (used to check if static allocation has changed)
 
     void create(const QudaFieldCreate);
@@ -531,23 +525,13 @@ namespace quda {
     */
     void createComms(int nFace, bool spin_project=true);
 
-    /**
-       @brief Destroy the communication handlers and buffers
-    */
-    void destroyComms();
-
     /**
        @brief Allocate the ghost buffers
        @param[in] nFace Depth of each halo
        @param[in] spin_project Whether the halos are spin projected (Wilson-type fermions only)
     */
     void allocateGhostBuffer(int nFace, bool spin_project=true) const;
 
-    /**
-       @brief Free statically allocated ghost buffers
-    */
-    static void freeGhostBuffer(void);
-
     /**
        @brief Packs the cudaColorSpinorField's ghost zone
        @param[in] nFace How many faces to pack (depth)
 
@@ -1079,21 +1079,21 @@ namespace quda {
 	{ if (volumeCB != a.Stride()) errorQuda("Stride must equal volume for this field order"); }
 	virtual ~QDPJITDiracOrder() { ; }
 
-	__device__ __host__ inline void load(RegType v[Ns*Nc*2], int x, int parity=1) const {
+	__device__ __host__ inline void load(RegType v[Ns*Nc*2], int x, int parity=0) const {
 	  for (int s=0; s<Ns; s++) {
 	    for (int c=0; c<Nc; c++) {
 	      for (int z=0; z<2; z++) {
-		v[(s*Nc+c)*2+z] = field[(((z*Nc + c)*Ns + s)*2 + parity)*volumeCB + x];
+		v[(s*Nc+c)*2+z] = field[(((z*Nc + c)*Ns + s)*2 + (1-parity))*volumeCB + x];
 	      }
 	    }
 	  }
 	}
 
-	__device__ __host__ inline void save(const RegType v[Ns*Nc*2], int x, int parity=1) {
+	__device__ __host__ inline void save(const RegType v[Ns*Nc*2], int x, int parity=0) {
 	  for (int s=0; s<Ns; s++) {
 	    for (int c=0; c<Nc; c++) {
 	      for (int z=0; z<2; z++) {
-		field[(((z*Nc + c)*Ns + s)*2 + parity)*volumeCB + x] = v[(s*Nc+c)*2+z];
+		field[(((z*Nc + c)*Ns + s)*2 + (1-parity))*volumeCB + x] = v[(s*Nc+c)*2+z];
 	      }
 	    }
 	  }
 
@@ -1,5 +1,5 @@
-#ifndef _COMM_QUDA_H
-#define _COMM_QUDA_H
+#pragma once
+#include <cstdint>
 
 #ifdef __cplusplus
 extern "C" {
@@ -212,12 +212,24 @@ extern "C" {
   void comm_allreduce_max(double* data);
   void comm_allreduce_array(double* data, size_t size);
   void comm_allreduce_int(int* data);
+  void comm_allreduce_xor(uint64_t *data);
   void comm_broadcast(void *data, size_t nbytes);
   void comm_barrier(void);
   void comm_abort(int status);
 
+  void reduceMaxDouble(double &);
+  void reduceDouble(double &);
+  void reduceDoubleArray(double *, const int len);
+  int commDim(int);
+  int commCoords(int);
+  int commDimPartitioned(int dir);
+  void commDimPartitionedSet(int dir);
+  bool commGlobalReduction();
+  void commGlobalReductionSet(bool global_reduce);
+
+  bool commAsyncReduction();
+  void commAsyncReductionSet(bool global_reduce);
+
 #ifdef __cplusplus
 }
 #endif
-
-#endif /* _COMM_QUDA_H */
 
@@ -6,7 +6,6 @@
 #include <gauge_field.h>
 #include <clover_field.h>
 #include <dslash_quda.h>
-#include <face_quda.h>
 #include <blas_quda.h>
 
 #include <typeinfo>
 
@@ -3,7 +3,6 @@
 
 #include <quda_internal.h>
 #include <tune_quda.h>
-#include <face_quda.h>
 #include <gauge_field.h>
 
 #include <worker.h>
Original file line number	Diff line number	Diff line change
`@@ -1079,21 +1079,21 @@ namespace quda {`
`1079`	`1079`	`{ if (volumeCB != a.Stride()) errorQuda("Stride must equal volume for this field order"); }`
`1080`	`1080`	`virtual ~QDPJITDiracOrder() { ; }`
`1081`	`1081`
`1082`		`- __device__ __host__ inline void load(RegType v[NsNc2], int x, int parity=1) const {`
	`1082`	`+ __device__ __host__ inline void load(RegType v[NsNc2], int x, int parity=0) const {`
`1083`	`1083`	`for (int s=0; s<Ns; s++) {`
`1084`	`1084`	`for (int c=0; c<Nc; c++) {`
`1085`	`1085`	`for (int z=0; z<2; z++) {`
`1086`		`- v[(sNc+c)2+z] = field[(((zNc + c)Ns + s)2 + parity)volumeCB + x];`
	`1086`	`+ v[(sNc+c)2+z] = field[(((zNc + c)Ns + s)2 + (1-parity))volumeCB + x];`
`1087`	`1087`	`}`
`1088`	`1088`	`}`
`1089`	`1089`	`}`
`1090`	`1090`	`}`
`1091`	`1091`
`1092`		`- __device__ __host__ inline void save(const RegType v[NsNc2], int x, int parity=1) {`
	`1092`	`+ __device__ __host__ inline void save(const RegType v[NsNc2], int x, int parity=0) {`
`1093`	`1093`	`for (int s=0; s<Ns; s++) {`
`1094`	`1094`	`for (int c=0; c<Nc; c++) {`
`1095`	`1095`	`for (int z=0; z<2; z++) {`
`1096`		`- field[(((zNc + c)Ns + s)2 + parity)volumeCB + x] = v[(sNc+c)2+z];`
	`1096`	`+ field[(((zNc + c)Ns + s)2 + (1-parity))volumeCB + x] = v[(sNc+c)2+z];`
`1097`	`1097`	`}`
`1098`	`1098`	`}`
`1099`	`1099`	`}`