Skip to content

Commit 3dc8834

Browse files
author
Markus Kliegl
committed
conv shift op: change to CamelCase
1 parent 92b0c69 commit 3dc8834

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

paddle/operators/conv_shift_op.cu

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ using framework::Tensor;
2222

2323
namespace {
2424

25-
inline int div_up(int x, int y) { return (x + y - 1) / y; }
25+
inline int DivUp(int x, int y) { return (x + y - 1) / y; }
2626

2727
// Some notes on the design:
2828
//
@@ -33,9 +33,9 @@ inline int div_up(int x, int y) { return (x + y - 1) / y; }
3333
// y is fairly small. For large y, it would probably be more efficient
3434
// to also tile across y.
3535
template <typename T>
36-
__global__ void conv_shift_forward(const T *x, const T *y, T *out, int x_width,
37-
int y_width, int y_half_width,
38-
int batch_size) {
36+
__global__ void ConvShiftForward(const T *x, const T *y, T *out, int x_width,
37+
int y_width, int y_half_width,
38+
int batch_size) {
3939
extern __shared__ T mem[];
4040

4141
int tx = threadIdx.x;
@@ -79,8 +79,8 @@ __global__ void conv_shift_forward(const T *x, const T *y, T *out, int x_width,
7979

8080
// Compute x gradient - initial naive implementation with atomic add.
8181
template <typename T>
82-
__global__ void conv_shift_dx(const T *dout, const T *y, T *dx, int x_width,
83-
int y_width, int y_half_width, int batch_size) {
82+
__global__ void ConvShiftGradX(const T *dout, const T *y, T *dx, int x_width,
83+
int y_width, int y_half_width, int batch_size) {
8484
int i = blockIdx.x * blockDim.x + threadIdx.x; // x index
8585
int j = blockIdx.y; // y index
8686
int k = blockIdx.z; // batch index
@@ -94,8 +94,8 @@ __global__ void conv_shift_dx(const T *dout, const T *y, T *dx, int x_width,
9494

9595
// Compute y gradient - initial naive implementation with atomic add.
9696
template <typename T>
97-
__global__ void conv_shift_dy(const T *x, const T *dout, T *dy, int x_width,
98-
int y_width, int y_half_width, int batch_size) {
97+
__global__ void ConvShiftDy(const T *x, const T *dout, T *dy, int x_width,
98+
int y_width, int y_half_width, int batch_size) {
9999
int i = blockIdx.x * blockDim.x + threadIdx.x; // x index
100100
int j = blockIdx.y; // y index
101101
int k = blockIdx.z; // batch index
@@ -125,14 +125,14 @@ class ConvShiftKernel<platform::GPUPlace, T> : public framework::OpKernel<T> {
125125
int y_half_width = (y_width - 1) / 2;
126126

127127
const int x_per_block = 256;
128-
int num_x_blocks = div_up(x_width, x_per_block);
128+
int num_x_blocks = DivUp(x_width, x_per_block);
129129
int mem_per_block = (x_per_block + 2 * y_width) * sizeof(T);
130130

131131
dim3 grid_dim(num_x_blocks, batch_size);
132132

133133
auto stream = context.cuda_device_context().stream();
134134

135-
conv_shift_forward<T><<<grid_dim, x_per_block, mem_per_block, stream>>>(
135+
ConvShiftForward<T><<<grid_dim, x_per_block, mem_per_block, stream>>>(
136136
x_data, y_data, out_data, x_width, y_width, y_half_width, batch_size);
137137
}
138138
};
@@ -160,20 +160,20 @@ class ConvShiftGradKernel<platform::GPUPlace, T>
160160
auto stream = context.cuda_device_context().stream();
161161

162162
const int x_per_block = 256;
163-
int num_x_blocks = div_up(x_width, x_per_block);
163+
int num_x_blocks = DivUp(x_width, x_per_block);
164164
dim3 grid_dim(num_x_blocks, y_width, batch_size);
165165

166166
if (dX) {
167167
T *dx_data = dX->mutable_data<T>(context.GetPlace());
168168
cudaMemsetAsync(dx_data, 0, dX->numel() * sizeof(T), stream);
169-
conv_shift_dx<T><<<grid_dim, x_per_block, 0, stream>>>(
169+
ConvShiftGradX<T><<<grid_dim, x_per_block, 0, stream>>>(
170170
dout_data, y_data, dx_data, x_width, y_width, y_half_width,
171171
batch_size);
172172
}
173173
if (dY) {
174174
T *dy_data = dY->mutable_data<T>(context.GetPlace());
175175
cudaMemsetAsync(dy_data, 0, dY->numel() * sizeof(T), stream);
176-
conv_shift_dy<T><<<grid_dim, x_per_block, 0, stream>>>(
176+
ConvShiftDy<T><<<grid_dim, x_per_block, 0, stream>>>(
177177
x_data, dout_data, dy_data, x_width, y_width, y_half_width,
178178
batch_size);
179179
}

0 commit comments

Comments
 (0)