lattice
diff --git a/‎CMakeLists.txt‎
Lines changed: 2 additions & 11 deletions b/‎CMakeLists.txt‎
Lines changed: 2 additions & 11 deletions
diff --git a/‎configure‎
Lines changed: 3 additions & 28 deletions b/‎configure‎
Lines changed: 3 additions & 28 deletions
diff --git a/‎configure.ac‎
Lines changed: 3 additions & 21 deletions b/‎configure.ac‎
Lines changed: 3 additions & 21 deletions
diff --git a/‎include/clover_field.h‎
Lines changed: 9 additions & 1 deletion b/‎include/clover_field.h‎
Lines changed: 9 additions & 1 deletion
@@ -67,8 +67,8 @@ if(NOT QUDA_GPU_ARCH)
   message ("-- Building QUDA for arch " "${QUDA_DEFAULT_GPU_ARCH}")
 endif()
 
-set(QUDA_GPU_ARCH ${QUDA_DEFAULT_GPU_ARCH} CACHE STRING "set the GPU architecture (sm_20, sm_21, sm_30, sm_35, sm_37, sm_50, sm_52, sm_60)")
-set_property(CACHE QUDA_GPU_ARCH PROPERTY STRINGS sm_20 sm_21 sm_30 sm_35 sm_37 sm_50 sm_52 sm_60)
+set(QUDA_GPU_ARCH ${QUDA_DEFAULT_GPU_ARCH} CACHE STRING "set the GPU architecture (sm_20, sm_21, sm_30, sm_35, sm_37, sm_50, sm_52, sm_60, sm_70)")
+set_property(CACHE QUDA_GPU_ARCH PROPERTY STRINGS sm_20 sm_21 sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_70)
 # build options
 set(QUDA_DIRAC_WILSON ON CACHE BOOL "build Wilson Dirac operators")
 set(QUDA_DIRAC_CLOVER ON CACHE BOOL "build clover Dirac operators")
@@ -137,9 +137,6 @@ set(QUDA_MAX_MULTI_BLAS_N  "4" CACHE STRING "maximum value to intizlize template
 set(QUDA_MPI_NVTX OFF CACHE BOOL "add nvtx markup to MPI API calls for the visual profiler")
 set(QUDA_INTERFACE_NVTX OFF CACHE BOOL "add nvtx markup to interface calls for the visual profiler")
 
-# GPUdirect options
-set(QUDA_GPU_DIRECT ON CACHE BOOL "set to 'yes' to allow GPU and NIC to shared pinned buffers")
-
 # features in development
 set(QUDA_SSTEP OFF CACHE BOOL "build s-step linear solvers")
 set(QUDA_MULTIGRID OFF CACHE BOOL "build multigrid solvers")
@@ -160,8 +157,6 @@ mark_as_advanced(QUDA_MAX_MULTI_BLAS_N)
 mark_as_advanced(QUDA_MPI_NVTX)
 mark_as_advanced(QUDA_INTERFACE_NVTX)
 
-mark_as_advanced(QUDA_GPU_DIRECT)
-
 mark_as_advanced(QUDA_SSTEP)
 mark_as_advanced(QUDA_USE_EIGEN)
 mark_as_advanced(QUDA_BLOCKSOVER)
@@ -457,10 +452,6 @@ if(NOT QUDA_FERMI_DBLE_TEX)
   add_definitions(-DFERMI_NO_DBLE_TEX)
 endif(NOT QUDA_FERMI_DBLE_TEX)
 
-if(GPU_DIRECT)
-  add_definitions(-DGPU_DIRECT)
-endif(GPU_DIRECT)
-
 if(QUDA_INTERFACE_QDP)
   add_definitions(-DBUILD_QDP_INTERFACE)
 endif(QUDA_INTERFACE_QDP)
 
@@ -609,7 +609,6 @@ BUILD_MILC_INTERFACE
 BUILD_QDP_INTERFACE
 INTERFACE_NVTX
 MPI_NVTX
-GPU_DIRECT
 POSIX_THREADS
 BUILD_MPI
 BUILD_QMP
@@ -730,7 +729,6 @@ enable_sstep
 enable_contract
 enable_dynamic_clover
 enable_multi_gpu
-enable_gpu_direct
 enable_mpi_nvtx
 enable_interface_nvtx
 with_mpi
@@ -1370,7 +1368,7 @@ Optional Features:
   --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
   --enable-cpu-arch=arch  Set CPU arch: x86 or x86_64 (default: x86_64)
   --enable-gpu-arch=arch  Set GPU arch: sm_20, sm_21, sm_30, sm_35, sm_50,
-                          sm_52, sm_60 (default: sm_35)
+                          sm_52, sm_60, sm_70 (default: sm_35)
   --enable-os=os          Set operating system: linux, osx (default: linux)
   --enable-host-debug     Enable debugging of host code
   --enable-device-debug   Enable debugging for device code
@@ -1418,8 +1416,6 @@ Optional Features:
   --enable-dynamic-clover Invert dynamically the clover term for
                           twisted-clover fermions (default: disabled)
   --enable-multi-gpu      Enable Multi-GPU support (default: disabled)
-  --enable-gpu-direct     Enable CUDA/NIC interop pinned memory (default:
-                          enabled)
   --enable-mpi-nvtx       Enable NVTX markup for profiling MPI API calls in
                           the visual profiler (default: disabled)
   --enable-interface-nvtx Enable NVTX markup for profiling interface calls in
@@ -2312,15 +2308,6 @@ else
 fi
 
 
-# Check whether --enable-gpu-direct was given.
-if test "${enable_gpu_direct+set}" = set; then :
-  enableval=$enable_gpu_direct;  gpu_direct=${enableval}
-else
-   gpu_direct="yes"
-
-fi
-
-
 # Check whether --enable-mpi-nvtx was given.
 if test "${enable_mpi_nvtx+set}" = set; then :
   enableval=$enable_mpi_nvtx;  mpi_nvtx=${enableval}
@@ -4044,9 +4031,9 @@ x86 | x86_64 ) ;;
 esac
 
 case ${gpu_arch} in
-sm_20 | sm_21 | sm_30 | sm_35 | sm_50 | sm_52 | sm_60) ;;
+sm_20 | sm_21 | sm_30 | sm_35 | sm_50 | sm_52 | sm_60 | sm_70) ;;
 *)
-  as_fn_error $? " GPU arch must be one of: sm_20, sm_21, sm_30, sm_35, sm_50, sm_52, sm_60 " "$LINENO" 5
+  as_fn_error $? " GPU arch must be one of: sm_20, sm_21, sm_30, sm_35, sm_50, sm_52, sm_60, sm_70 " "$LINENO" 5
   ;;
 esac
 
@@ -4253,13 +4240,6 @@ yes|no);;
   ;;
 esac
 
-case ${gpu_direct} in
-yes|no);;
-*)
-  as_fn_error $? " invalid value for --enable-gpu-direct " "$LINENO" 5
-  ;;
-esac
-
 case ${mpi_nvtx} in
 yes|no);;
 *)
@@ -4493,11 +4473,6 @@ $as_echo "$as_me: Setting POSIX_THREADS = ${posix_threads}" >&6;}
 POSIX_THREADS=${posix_threads}
 
 
-{ $as_echo "$as_me:${as_lineno-$LINENO}: Setting GPU_DIRECT= ${gpu_direct}" >&5
-$as_echo "$as_me: Setting GPU_DIRECT= ${gpu_direct}" >&6;}
-GPU_DIRECT=${gpu_direct}
-
-
 { $as_echo "$as_me:${as_lineno-$LINENO}: Setting MPI_NVTXS= ${mpi_nvtx}" >&5
 $as_echo "$as_me: Setting MPI_NVTXS= ${mpi_nvtx}" >&6;}
 MPI_NVTX=${mpi_nvtx}
 
@@ -18,7 +18,7 @@ AC_ARG_ENABLE(cpu-arch,
 
 dnl Specify GPU Arch
 AC_ARG_ENABLE(gpu-arch,
- AC_HELP_STRING([--enable-gpu-arch=arch], [ Set GPU arch: sm_20, sm_21, sm_30, sm_35, sm_50, sm_52, sm_60 (default: sm_35)]),
+ AC_HELP_STRING([--enable-gpu-arch=arch], [ Set GPU arch: sm_20, sm_21, sm_30, sm_35, sm_50, sm_52, sm_60, sm_70 (default: sm_35)]),
  [ gpu_arch=${enableval} ],
  [ gpu_arch="sm_35" ]
 )
@@ -207,13 +207,6 @@ AC_ARG_ENABLE(multi-gpu,
   [ multi_gpu="no" ]
 )
 
-dnl enable cuda / nic interop buffer
-AC_ARG_ENABLE(gpu-direct,
-  AC_HELP_STRING([--enable-gpu-direct], [ Enable CUDA/NIC interop pinned memory (default: enabled)]),
-  [ gpu_direct=${enableval}],
-  [ gpu_direct="yes" ]
-)
-
 dnl enable NVTX mark up for the MPI in the visual profiler
 AC_ARG_ENABLE(mpi-nvtx,
   AC_HELP_STRING([--enable-mpi-nvtx], [ Enable NVTX markup for profiling MPI API calls in the visual profiler (default: disabled)]),
@@ -351,9 +344,9 @@ esac
 
 dnl CPU Arch
 case ${gpu_arch} in
-sm_20 | sm_21 | sm_30 | sm_35 | sm_50 | sm_52 | sm_60) ;;
+sm_20 | sm_21 | sm_30 | sm_35 | sm_50 | sm_52 | sm_60 | sm_70) ;;
 *)
-  AC_MSG_ERROR([ GPU arch must be one of: sm_20, sm_21, sm_30, sm_35, sm_50, sm_52, sm_60 ])
+  AC_MSG_ERROR([ GPU arch must be one of: sm_20, sm_21, sm_30, sm_35, sm_50, sm_52, sm_60, sm_70 ])
   ;;
 esac
 
@@ -589,14 +582,6 @@ yes|no);;
   ;;
 esac
 
-dnl Enables CUDA/NIC buffer interop
-case ${gpu_direct} in
-yes|no);;
-*)
-  AC_MSG_ERROR([ invalid value for --enable-gpu-direct ])
-  ;;
-esac
-
 dnl enable NVTX mark up for the MPI in the visual profiler
 case ${mpi_nvtx} in
 yes|no);;
@@ -780,9 +765,6 @@ AC_SUBST( BUILD_MPI, [${build_mpi}])
 AC_MSG_NOTICE([Setting POSIX_THREADS = ${posix_threads}])
 AC_SUBST( POSIX_THREADS, [${posix_threads}])
 
-AC_MSG_NOTICE([Setting GPU_DIRECT= ${gpu_direct}])
-AC_SUBST( GPU_DIRECT, [${gpu_direct}])
-
 AC_MSG_NOTICE([Setting MPI_NVTXS= ${mpi_nvtx}])
 AC_SUBST( MPI_NVTX, [${mpi_nvtx}])
 
 
@@ -102,6 +102,10 @@ namespace quda {
     void compute(const cudaGaugeField &gauge);
 
 #ifdef USE_TEXTURE_OBJECTS
+    cudaTextureObject_t tex;
+    cudaTextureObject_t normTex;
+    cudaTextureObject_t invTex;
+    cudaTextureObject_t invNormTex;
     cudaTextureObject_t evenTex;
     cudaTextureObject_t evenNormTex;
     cudaTextureObject_t oddTex;
@@ -110,7 +114,7 @@ namespace quda {
     cudaTextureObject_t evenInvNormTex;
     cudaTextureObject_t oddInvTex;
     cudaTextureObject_t oddInvNormTex;
-    void createTexObject(cudaTextureObject_t &tex, cudaTextureObject_t &texNorm, void *field, void *norm);
+    void createTexObject(cudaTextureObject_t &tex, cudaTextureObject_t &texNorm, void *field, void *norm, bool full);
     void destroyTexObject();
 #endif
 
@@ -121,6 +125,10 @@ namespace quda {
     virtual ~cudaCloverField();
 
 #ifdef USE_TEXTURE_OBJECTS
+    const cudaTextureObject_t& Tex() const { return tex; }
+    const cudaTextureObject_t& NormTex() const { return normTex; }
+    const cudaTextureObject_t& InvTex() const { return invTex; }
+    const cudaTextureObject_t& InvNormTex() const { return invNormTex; }
     const cudaTextureObject_t& EvenTex() const { return evenTex; }
     const cudaTextureObject_t& EvenNormTex() const { return evenNormTex; }
     const cudaTextureObject_t& OddTex() const { return oddTex; }