Skip to content

Commit 0860049

Browse files
Merge pull request #52 from CCInc/atomicAdd
added atomicadd implementation (fix #50)
2 parents 93f3a2a + 461a8e3 commit 0860049

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

cuda/include/cuda_utils.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,28 @@ inline dim3 opt_block_config(int x, int y)
2929
return block_config;
3030
}
3131

32+
// from https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions
33+
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
34+
#else
35+
__device__ double atomicAdd(double* address, double val)
36+
{
37+
unsigned long long int* address_as_ull =
38+
(unsigned long long int*)address;
39+
unsigned long long int old = *address_as_ull, assumed;
40+
41+
do {
42+
assumed = old;
43+
old = atomicCAS(address_as_ull, assumed,
44+
__double_as_longlong(val +
45+
__longlong_as_double(assumed)));
46+
47+
// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
48+
} while (assumed != old);
49+
50+
return __longlong_as_double(old);
51+
}
52+
#endif
53+
3254
#define CUDA_CHECK_ERRORS() \
3355
do \
3456
{ \

cuda/src/chamfer_dist.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <torch/extension.h>
44

55
#include <vector>
6+
#include "cuda_utils.h"
67

78
template <typename scalar_t>
89
__global__ void chamfer_dist_kernel(int batch_size, int n, const scalar_t* __restrict__ xyz1, int m,

0 commit comments

Comments
 (0)