Skip to content

Commit c00237f

Browse files
fix_affine_grid_cuda13 (PaddlePaddle#76367)
1 parent d3f7e14 commit c00237f

File tree

4 files changed

+73
-53
lines changed

4 files changed

+73
-53
lines changed

paddle/phi/kernels/funcs/affine_grid_utils.cu

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ namespace phi {
2525
namespace funcs {
2626

2727
template <typename T>
28-
__global__ void CreateBaseGridKernel_4D(
28+
__global__ void CreateBaseGridKernel_4D_Kernel(
2929
T* base_grid_data, int64_t n, int64_t h, int64_t w, bool align_corners) {
3030
int64_t total_elements = n * h * w;
3131
CUDA_KERNEL_LOOP(idx, total_elements) {
@@ -78,12 +78,12 @@ __global__ void CreateBaseGridKernel_4D(
7878
}
7979

8080
template <typename T>
81-
__global__ void CreateBaseGridKernel_5D(T* base_grid_data,
82-
int64_t n,
83-
int64_t d,
84-
int64_t h,
85-
int64_t w,
86-
bool align_corners) {
81+
__global__ void CreateBaseGridKernel_5D_Kernel(T* base_grid_data,
82+
int64_t n,
83+
int64_t d,
84+
int64_t h,
85+
int64_t w,
86+
bool align_corners) {
8787
int64_t total_elements = n * d * h * w;
8888
CUDA_KERNEL_LOOP(idx, total_elements) {
8989
int64_t w_idx = idx % w;
@@ -155,14 +155,46 @@ __global__ void CreateBaseGridKernel_5D(T* base_grid_data,
155155
}
156156
}
157157

158-
template __global__ void CreateBaseGridKernel_4D<float>(
159-
float*, int64_t, int64_t, int64_t, bool);
160-
template __global__ void CreateBaseGridKernel_4D<double>(
161-
double*, int64_t, int64_t, int64_t, bool);
158+
template <typename T, typename Context>
159+
void CreateBaseGridKernel_4D(const Context& dev_ctx,
160+
T* base_grid_data,
161+
int64_t n,
162+
int64_t h,
163+
int64_t w,
164+
bool align_corners) {
165+
int64_t total_elements = n * h * w;
166+
auto stream = dev_ctx.stream();
167+
int64_t block_size = 512;
168+
int64_t grid_size = (total_elements + block_size - 1) / block_size;
169+
CreateBaseGridKernel_4D_Kernel<T><<<grid_size, block_size, 0, stream>>>(
170+
base_grid_data, n, h, w, align_corners);
171+
}
172+
173+
template <typename T, typename Context>
174+
void CreateBaseGridKernel_5D(const Context& dev_ctx,
175+
T* base_grid_data,
176+
int64_t n,
177+
int64_t d,
178+
int64_t h,
179+
int64_t w,
180+
bool align_corners) {
181+
int64_t total_elements = n * d * h * w;
182+
auto stream = dev_ctx.stream();
183+
int64_t block_size = 512;
184+
int64_t grid_size = (total_elements + block_size - 1) / block_size;
185+
CreateBaseGridKernel_5D_Kernel<T><<<grid_size, block_size, 0, stream>>>(
186+
base_grid_data, n, d, h, w, align_corners);
187+
}
188+
189+
template void CreateBaseGridKernel_4D<float, phi::GPUContext>(
190+
const phi::GPUContext&, float*, int64_t, int64_t, int64_t, bool);
191+
template void CreateBaseGridKernel_4D<double, phi::GPUContext>(
192+
const phi::GPUContext&, double*, int64_t, int64_t, int64_t, bool);
193+
194+
template void CreateBaseGridKernel_5D<float, phi::GPUContext>(
195+
const phi::GPUContext&, float*, int64_t, int64_t, int64_t, int64_t, bool);
196+
template void CreateBaseGridKernel_5D<double, phi::GPUContext>(
197+
const phi::GPUContext&, double*, int64_t, int64_t, int64_t, int64_t, bool);
162198

163-
template __global__ void CreateBaseGridKernel_5D<float>(
164-
float*, int64_t, int64_t, int64_t, int64_t, bool);
165-
template __global__ void CreateBaseGridKernel_5D<double>(
166-
double*, int64_t, int64_t, int64_t, int64_t, bool);
167199
} // namespace funcs
168200
} // namespace phi

paddle/phi/kernels/funcs/affine_grid_utils.h

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#pragma once
1616

1717
#include "paddle/phi/core/dense_tensor.h"
18+
#include "paddle/phi/core/device_context.h"
1819
#include "paddle/phi/kernels/funcs/blas/blas.h"
1920
#include "paddle/phi/kernels/funcs/eigen/common.h"
2021
#include "paddle/phi/kernels/funcs/math_function.h"
@@ -184,16 +185,23 @@ inline void GetIdxMap5D(int n,
184185

185186
namespace funcs {
186187
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
187-
template <typename T>
188-
__global__ void CreateBaseGridKernel_4D(
189-
T* base_grid_data, int64_t n, int64_t h, int64_t w, bool align_corners);
190-
template <typename T>
191-
__global__ void CreateBaseGridKernel_5D(T* base_grid_data,
192-
int64_t n,
193-
int64_t d,
194-
int64_t h,
195-
int64_t w,
196-
bool align_corners);
188+
189+
template <typename T, typename Context>
190+
void CreateBaseGridKernel_4D(const Context& dev_ctx,
191+
T* base_grid_data,
192+
int64_t n,
193+
int64_t h,
194+
int64_t w,
195+
bool align_corners);
196+
197+
template <typename T, typename Context>
198+
void CreateBaseGridKernel_5D(const Context& dev_ctx,
199+
T* base_grid_data,
200+
int64_t n,
201+
int64_t d,
202+
int64_t h,
203+
int64_t w,
204+
bool align_corners);
197205
#endif
198206
} // namespace funcs
199207

paddle/phi/kernels/gpu/affine_grid_grad_kernel.cu

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,8 @@ void AffineGridGrad4DCUDAKernel(const Context& dev_ctx,
6060
base_grid.Resize(common::make_ddim({n, h, w, 3}));
6161
T* base_grid_data = dev_ctx.template Alloc<T>(&base_grid);
6262

63-
int64_t total_elements = n * h * w;
64-
auto stream = dev_ctx.stream();
65-
int64_t block_size = 512;
66-
int64_t grid_size = (total_elements + block_size - 1) / block_size;
67-
68-
phi::funcs::CreateBaseGridKernel_4D<T><<<grid_size, block_size, 0, stream>>>(
69-
base_grid_data, n, h, w, align_corners);
63+
phi::funcs::CreateBaseGridKernel_4D<T, Context>(
64+
dev_ctx, base_grid_data, n, h, w, align_corners);
7065

7166
// 2. Reshaping base_grid to [N, H * W, 3]
7267
DenseTensor base_grid_reshaped;
@@ -127,13 +122,8 @@ void AffineGridGrad5DCUDAKernel(const Context& dev_ctx,
127122
base_grid.Resize(common::make_ddim({n, d, h, w, 4}));
128123
T* base_grid_data = dev_ctx.template Alloc<T>(&base_grid);
129124

130-
int64_t total_elements = n * d * h * w;
131-
auto stream = dev_ctx.stream();
132-
int64_t block_size = 512;
133-
int64_t grid_size = (total_elements + block_size - 1) / block_size;
134-
135-
phi::funcs::CreateBaseGridKernel_5D<T><<<grid_size, block_size, 0, stream>>>(
136-
base_grid_data, n, d, h, w, align_corners);
125+
phi::funcs::CreateBaseGridKernel_5D<T, Context>(
126+
dev_ctx, base_grid_data, n, d, h, w, align_corners);
137127

138128
// 2. Reshaping base_grid to [N, D * H * W, 4]
139129
DenseTensor base_grid_reshaped;

paddle/phi/kernels/gpu/affine_grid_kernel.cu

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,8 @@ void AffineGrid4DCUDAKernel(const Context& dev_ctx,
5353
base_grid.Resize(common::make_ddim({n, h, w, 3}));
5454
T* base_grid_data = dev_ctx.template Alloc<T>(&base_grid);
5555

56-
int64_t total_elements = n * h * w;
57-
auto stream = dev_ctx.stream();
58-
int64_t block_size = 512;
59-
int64_t grid_size = (total_elements + block_size - 1) / block_size;
60-
61-
phi::funcs::CreateBaseGridKernel_4D<T><<<grid_size, block_size, 0, stream>>>(
62-
base_grid_data, n, h, w, align_corners);
56+
phi::funcs::CreateBaseGridKernel_4D<T, Context>(
57+
dev_ctx, base_grid_data, n, h, w, align_corners);
6358

6459
// Apply affine transformation
6560
DenseTensor base_grid_new;
@@ -107,13 +102,8 @@ void AffineGrid5DCUDAKernel(const Context& dev_ctx,
107102
base_grid.Resize(common::make_ddim({n, d, h, w, 4}));
108103
T* base_grid_data = dev_ctx.template Alloc<T>(&base_grid);
109104

110-
int64_t total_elements = n * d * h * w;
111-
auto stream = dev_ctx.stream();
112-
int64_t block_size = 512;
113-
int64_t grid_size = (total_elements + block_size - 1) / block_size;
114-
115-
phi::funcs::CreateBaseGridKernel_5D<T><<<grid_size, block_size, 0, stream>>>(
116-
base_grid_data, n, d, h, w, align_corners);
105+
phi::funcs::CreateBaseGridKernel_5D<T, Context>(
106+
dev_ctx, base_grid_data, n, d, h, w, align_corners);
117107

118108
// Apply affine transformation
119109
DenseTensor base_grid_new;

0 commit comments

Comments
 (0)