Skip to content

Commit 938cea2

Browse files
authored
merge CUDA and ROCm codes in op (#2847)
Signed-off-by: Jinzhe Zeng <[email protected]>
1 parent e7a8876 commit 938cea2

11 files changed

+66
-608
lines changed

source/lib/include/gpu_cuda.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define gpuMemcpy cudaMemcpy
1414
#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
1515
#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
16+
#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
1617
#define gpuMemset cudaMemset
1718

1819
#define GPU_MAX_NBOR_SIZE 4096

source/lib/include/gpu_rocm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define gpuMemcpy hipMemcpy
1717
#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
1818
#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
19+
#define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
1920
#define gpuMemset hipMemset
2021

2122
#define DPErrcheck(res) \

source/op/gelu_multi_device.cc

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,9 @@ class GeluOp : public OpKernel {
6464
const int_64 size = static_cast<int_64>(output_tensor->NumElements());
6565

6666
if (device == "GPU") {
67-
#if GOOGLE_CUDA
68-
deepmd::gelu_gpu(out, x, size);
69-
#endif // GOOGLE_CUDA
70-
71-
#if TENSORFLOW_USE_ROCM
67+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
7268
deepmd::gelu_gpu(out, x, size);
73-
#endif // TENSORFLOW_USE_ROCM
69+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
7470
} else if (device == "CPU") {
7571
deepmd::gelu_cpu(out, x, size);
7672
}
@@ -108,13 +104,9 @@ class GeluGradOp : public OpKernel {
108104
const int_64 size = static_cast<int_64>(output_tensor->NumElements());
109105

110106
if (device == "GPU") {
111-
#if GOOGLE_CUDA
112-
deepmd::gelu_grad_gpu(out, x, dy, size);
113-
#endif // GOOGLE_CUDA
114-
115-
#if TENSORFLOW_USE_ROCM
107+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
116108
deepmd::gelu_grad_gpu(out, x, dy, size);
117-
#endif // TENSORFLOW_USE_ROCM
109+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
118110
} else if (device == "CPU") {
119111
deepmd::gelu_grad_cpu(out, x, dy, size);
120112
}
@@ -154,13 +146,9 @@ class GeluGradGradOp : public OpKernel {
154146
const int_64 size = static_cast<int_64>(output_tensor->NumElements());
155147

156148
if (device == "GPU") {
157-
#if GOOGLE_CUDA
158-
deepmd::gelu_grad_grad_gpu(out, x, dy, dy_2, size);
159-
#endif // GOOGLE_CUDA
160-
161-
#if TENSORFLOW_USE_ROCM
149+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
162150
deepmd::gelu_grad_grad_gpu(out, x, dy, dy_2, size);
163-
#endif // TENSORFLOW_USE_ROCM
151+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
164152
} else if (device == "CPU") {
165153
deepmd::gelu_grad_grad_cpu(out, x, dy, dy_2, size);
166154
}

0 commit comments

Comments
 (0)