no half tensor type

rusty1s · rusty1s · commit e419e153d17f · 2018-01-04T14:32:07.000+01:00
diff --git a/torch_scatter/kernel/THCAtomics.cuh b/torch_scatter/kernel/THCAtomics.cuh
@@ -109,9 +109,6 @@ static inline __device__ void atomicAdd( double *address,  double val) { AtomicA
 #elif !defined(__CUDA_ARCH__) && (CUDA_VERSION < 8000)
 static inline __device__ void atomicAdd( double *address,  double val) {}
 #endif
-#ifdef CUDA_HALF_TENSOR
-static inline __device__ void atomicAdd(   half *address,    half val) {}
-#endif
 
 #define OP(X, Y) Y * X
 ATOMIC_(Mul)
@@ -123,9 +120,6 @@ static inline __device__ void atomicMul(int32_t *address, int32_t val) { AtomicM
 static inline __device__ void atomicMul(int64_t *address, int64_t val) { AtomicMulIntegerImpl<int64_t, sizeof(int64_t)>()(address, val); }
 static inline __device__ void atomicMul(  float *address,   float val) { AtomicMulDecimalImpl<  float, sizeof(  float)>()(address, val); }
 static inline __device__ void atomicMul( double *address,  double val) { AtomicMulDecimalImpl< double, sizeof( double)>()(address, val); }
-#ifdef CUDA_HALF_TENSOR
-static inline __device__ void atomicMul(   half *address,    half val) {}
-#endif
 
 #define OP(X, Y) Y / X
 ATOMIC_(Div)
@@ -137,9 +131,6 @@ static inline __device__ void atomicDiv(int32_t *address, int32_t val) { AtomicD
 static inline __device__ void atomicDiv(int64_t *address, int64_t val) { AtomicDivIntegerImpl<int64_t, sizeof(int64_t)>()(address, val); }
 static inline __device__ void atomicDiv(  float *address,   float val) { AtomicDivDecimalImpl<  float, sizeof(  float)>()(address, val); }
 static inline __device__ void atomicDiv( double *address,  double val) { AtomicDivDecimalImpl< double, sizeof( double)>()(address, val); }
-#ifdef CUDA_HALF_TENSOR
-static inline __device__ void atomicDiv(   half *address,    half val) {}
-#endif
 
 #define OP(X, Y) max(Y, X)
 ATOMIC_(Max)
@@ -150,9 +141,6 @@ static inline __device__ void atomicMax(int16_t *address, int16_t val) { AtomicM
 static inline __device__ void atomicMax(int64_t *address, int64_t val) { AtomicMaxIntegerImpl<int64_t, sizeof(int64_t)>()(address, val); }
 static inline __device__ void atomicMax(  float *address,   float val) { AtomicMaxDecimalImpl<  float, sizeof(  float)>()(address, val); }
 static inline __device__ void atomicMax( double *address,  double val) { AtomicMaxDecimalImpl< double, sizeof( double)>()(address, val); }
-#ifdef CUDA_HALF_TENSOR
-static inline __device__ void atomicMax(   half *address,    half val) {}
-#endif
 
 #define OP(X, Y) min(Y, X)
 ATOMIC_(Min)
@@ -163,6 +151,3 @@ static inline __device__ void atomicMin(int16_t *address, int16_t val) { AtomicM
 static inline __device__ void atomicMin(int64_t *address, int64_t val) { AtomicMinIntegerImpl<int64_t, sizeof(int64_t)>()(address, val); }
 static inline __device__ void atomicMin(  float *address,   float val) { AtomicMinDecimalImpl<  float, sizeof(  float)>()(address, val); }
 static inline __device__ void atomicMin( double *address,  double val) { AtomicMinDecimalImpl< double, sizeof( double)>()(address, val); }
-#ifdef CUDA_HALF_TENSOR
-static inline __device__ void atomicMin(   half *address,    half val) {}
-#endif
diff --git a/torch_scatter/kernel/common.cuh b/torch_scatter/kernel/common.cuh
@@ -35,14 +35,3 @@ struct TensorInfo {
   } \
   THCudaCheck(cudaGetLastError()); \
 }
-
-static inline __device__ bool eq(uint8_t a, uint8_t b) { return a == b; }
-static inline __device__ bool eq( int8_t a,  int8_t b) { return a == b; }
-static inline __device__ bool eq(int16_t a, int16_t b) { return a == b; }
-static inline __device__ bool eq(int32_t a, int32_t b) { return a == b; }
-static inline __device__ bool eq(int64_t a, int64_t b) { return a == b; }
-static inline __device__ bool eq(  float a,   float b) { return a == b; }
-static inline __device__ bool eq( double a,  double b) { return a == b; }
-#ifdef CUDA_HALF_TENSOR
-static inline __device__ bool eq(half a, half b) { return __half2float(a) == __half2float(b); }
-#endif
diff --git a/torch_scatter/kernel/kernel.cu b/torch_scatter/kernel/kernel.cu
@@ -64,7 +64,7 @@ __global__ void argKernel(TensorInfo<Real> output, TensorInfo<int64_t> index, Te
   KERNEL_LOOP(i, n) {
     int outputOffset = 0; int indexOffset = 0; int inputOffset = 0; int argOffset = 0;
     IndexToScatterOffsets4<Real, Real, int64_t, Dims>::compute(i, dim, index, &indexOffset, input, &inputOffset, output, &outputOffset, arg, &argOffset);
-    if (eq(input.data[inputOffset], output.data[outputOffset])) arg.data[argOffset] = inputOffset % input.size[dim];
+    if (input.data[inputOffset] == output.data[outputOffset]) arg.data[argOffset] = inputOffset % input.size[dim];
   }
 }
 
@@ -78,4 +78,16 @@ __global__ void indexBackwardKernel(TensorInfo<Real> output, TensorInfo<int64_t>
 }
 
 #include "generic/kernel.cu"
-#include "THCGenerateAllTypes.h"
+#include "THCGenerateFloatType.h"
+#include "generic/kernel.cu"
+#include "THCGenerateDoubleType.h"
+#include "generic/kernel.cu"
+#include "THCGenerateByteType.h"
+#include "generic/kernel.cu"
+#include "THCGenerateCharType.h"
+#include "generic/kernel.cu"
+#include "THCGenerateShortType.h"
+#include "generic/kernel.cu"
+#include "THCGenerateIntType.h"
+#include "generic/kernel.cu"
+#include "THCGenerateLongType.h"