Skip to content

Commit e05a82d

Browse files
co63ocEnigmatisms
authored andcommitted
Rename ctx to dev_ctx in paddle/phi/kernels/ [fluid_ops] (PaddlePaddle#74479)
1 parent 2f77d94 commit e05a82d

File tree

10 files changed

+106
-71
lines changed

10 files changed

+106
-71
lines changed

paddle/phi/kernels/autotune/auto_tune_base.h

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class AutoTuneBase {
6363
}
6464

6565
template <typename Context, typename... Args>
66-
void Run(const Context& ctx,
66+
void Run(const Context& dev_ctx,
6767
const AlgorithmType& algo,
6868
const size_t key,
6969
Args&&... args) {
@@ -78,7 +78,7 @@ class AutoTuneBase {
7878
if (use_autotune) {
7979
// All available kernels have ran while picking the best kernel,
8080
// so there may be no need for another kernel run.
81-
auto best_idx = PickBestKernel(ctx, args...);
81+
auto best_idx = PickBestKernel(dev_ctx, args...);
8282
cache.Set(key, best_idx);
8383
} else {
8484
kernels_[0].Run(args...);
@@ -100,14 +100,14 @@ class AutoTuneBase {
100100
}
101101

102102
template <typename Context, typename... Args>
103-
size_t PickBestKernel(const Context& ctx, Args&&... args) {
103+
size_t PickBestKernel(const Context& dev_ctx, Args&&... args) {
104104
std::lock_guard<std::mutex> lock(mutex_);
105105
size_t best_idx = 0;
106106
float min_time = std::numeric_limits<float>::max();
107107

108108
// Time cost test established in default stream.
109109
for (size_t i = 0; i < kernels_.size(); ++i) {
110-
auto time = RunAndMeasureKernel<Context>(ctx, i, args...);
110+
auto time = RunAndMeasureKernel<Context>(dev_ctx, i, args...);
111111
if (time < min_time) {
112112
min_time = time;
113113
best_idx = i;
@@ -118,15 +118,17 @@ class AutoTuneBase {
118118
}
119119

120120
template <typename Context, typename... Args>
121-
float RunAndMeasureKernel(const Context& ctx, const int idx, Args&&... args) {
121+
float RunAndMeasureKernel(const Context& dev_ctx,
122+
const int idx,
123+
Args&&... args) {
122124
// Regard 1st run as warmup, judge the compare result by the time cost
123125
// of rest cycles.
124126
constexpr int repeats = 11;
125127
phi::GpuTimer timer;
126128
float time_cost = 0;
127-
const auto& stream = ctx.stream();
129+
const auto& stream = dev_ctx.stream();
128130

129-
ctx.Wait();
131+
dev_ctx.Wait();
130132
for (int i = 0; i < repeats; ++i) {
131133
timer.Start(stream);
132134
kernels_[idx].Run(args...);
@@ -158,7 +160,7 @@ class MatmulAutoTuner
158160
}
159161

160162
template <typename Context>
161-
void Run(const Context& ctx, const size_t key, Args... args) {
163+
void Run(const Context& dev_ctx, const size_t key, Args... args) {
162164
this->is_init_ = true;
163165
this->CheckKernelSize();
164166
auto& cache = AutoTuneCache::Instance().GetMatmul();
@@ -168,7 +170,7 @@ class MatmulAutoTuner
168170
} else {
169171
bool use_autotune = AutoTuneStatus::Instance().UseAutoTune();
170172
if (use_autotune) {
171-
auto best_idx = this->PickBestKernel(ctx, args...);
173+
auto best_idx = this->PickBestKernel(dev_ctx, args...);
172174
cache.Set(key, best_idx);
173175
} else {
174176
this->kernels_[0].Run(args...);
@@ -210,7 +212,7 @@ class GatherGemmScatterAutoTuner
210212
return instance.get();
211213
}
212214

213-
void Run(const phi::GPUContext& ctx,
215+
void Run(const phi::GPUContext& dev_ctx,
214216
const size_t key,
215217
T const alpha,
216218
T const beta,
@@ -227,15 +229,15 @@ class GatherGemmScatterAutoTuner
227229
} else {
228230
// Set alpha to 0 and beta to 1 to avoid changing the value of d when
229231
// picking the best kernel
230-
auto best_idx =
231-
PickBestKernel(ctx, static_cast<T>(0), static_cast<T>(1), args...);
232+
auto best_idx = PickBestKernel(
233+
dev_ctx, static_cast<T>(0), static_cast<T>(1), args...);
232234
cache.Set(key, best_idx);
233235
this->kernels_[best_idx].Run(alpha, beta, args...);
234236
}
235237
}
236238

237239
protected:
238-
size_t PickBestKernel(const phi::GPUContext& ctx,
240+
size_t PickBestKernel(const phi::GPUContext& dev_ctx,
239241
const T& alpha,
240242
const T& beta,
241243
Args&... args) {
@@ -250,7 +252,7 @@ class GatherGemmScatterAutoTuner
250252
// Some kernels may require more shared memory than available, skip these
251253
// kernels.
252254
try {
253-
time = this->RunAndMeasureKernel(ctx, i, alpha, beta, args...);
255+
time = this->RunAndMeasureKernel(dev_ctx, i, alpha, beta, args...);
254256
if (time < min_time) {
255257
min_time = time;
256258
best_idx = i;

paddle/phi/kernels/funcs/cross_entropy.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ struct HardLabelCrossEntropyCPUFunctorImpl {
9393

9494
template <typename DeviceContext, typename T>
9595
void CrossEntropyFunctor<DeviceContext, T>::operator()(
96-
const DeviceContext& ctx,
96+
const DeviceContext& dev_ctx,
9797
phi::DenseTensor* out,
9898
const phi::DenseTensor* prob,
9999
const phi::DenseTensor* labels,
@@ -110,7 +110,7 @@ void CrossEntropyFunctor<DeviceContext, T>::operator()(
110110
auto lbl = EigenMatrix<T>::From(*labels);
111111
auto loss = EigenMatrix<T>::From(*out);
112112

113-
loss.device(*ctx.eigen_device()) =
113+
loss.device(*dev_ctx.eigen_device()) =
114114
-((lbl * in.log().unaryExpr(phi::funcs::TolerableValue<T>()))
115115
.reshape(batch_axis_remain)
116116
.sum(Eigen::DSizes<int, 1>(1)));

paddle/phi/kernels/funcs/gather_scatter_functor.cc

Lines changed: 77 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ struct cpu_gather_scatter_functor {
7777
const std::string& method_name,
7878
const func_t& reduce_op,
7979
bool include_self,
80-
const phi::DeviceContext& ctx UNUSED) {
80+
const phi::DeviceContext& dev_ctx UNUSED) {
8181
if (index.numel() == 0) {
8282
return;
8383
}
@@ -237,7 +237,7 @@ void cpu_gather_kernel(phi::DenseTensor self,
237237
const phi::DenseTensor& index,
238238
phi::DenseTensor result,
239239
bool include_self,
240-
const phi::DeviceContext& ctx) {
240+
const phi::DeviceContext& dev_ctx) {
241241
cpu_gather_scatter_functor<tensor_t,
242242
index_t,
243243
/*is_scatter_like=*/false>()(result,
@@ -247,7 +247,7 @@ void cpu_gather_kernel(phi::DenseTensor self,
247247
"gather_out_cpu",
248248
tensor_assign,
249249
include_self,
250-
ctx);
250+
dev_ctx);
251251
}
252252

253253
template <typename tensor_t, typename index_t>
@@ -256,7 +256,7 @@ void cpu_scatter_assign_kernel(phi::DenseTensor self,
256256
const phi::DenseTensor& index,
257257
phi::DenseTensor src,
258258
bool include_self,
259-
const phi::DeviceContext& ctx) {
259+
const phi::DeviceContext& dev_ctx) {
260260
cpu_gather_scatter_functor<tensor_t,
261261
index_t,
262262
/*is_scatter_like=*/true>()(self,
@@ -266,7 +266,7 @@ void cpu_scatter_assign_kernel(phi::DenseTensor self,
266266
"scatter_assign_cpu",
267267
tensor_assign,
268268
include_self,
269-
ctx);
269+
dev_ctx);
270270
}
271271

272272
template <typename tensor_t, typename index_t>
@@ -275,11 +275,17 @@ void cpu_scatter_add_kernel(phi::DenseTensor self,
275275
const phi::DenseTensor& index,
276276
phi::DenseTensor src,
277277
bool include_self,
278-
const phi::DeviceContext& ctx) {
278+
const phi::DeviceContext& dev_ctx) {
279279
cpu_gather_scatter_functor<tensor_t,
280280
index_t,
281-
/*is_scatter_like=*/true>()(
282-
self, dim, index, src, "scatter_add_cpu", reduce_add, include_self, ctx);
281+
/*is_scatter_like=*/true>()(self,
282+
dim,
283+
index,
284+
src,
285+
"scatter_add_cpu",
286+
reduce_add,
287+
include_self,
288+
dev_ctx);
283289
}
284290

285291
template <typename tensor_t, typename index_t>
@@ -288,11 +294,17 @@ void cpu_scatter_mul_kernel(phi::DenseTensor self,
288294
const phi::DenseTensor& index,
289295
phi::DenseTensor src,
290296
bool include_self,
291-
const phi::DeviceContext& ctx) {
297+
const phi::DeviceContext& dev_ctx) {
292298
cpu_gather_scatter_functor<tensor_t,
293299
index_t,
294-
/*is_scatter_like=*/true>()(
295-
self, dim, index, src, "scatter_mul_cpu", reduce_mul, include_self, ctx);
300+
/*is_scatter_like=*/true>()(self,
301+
dim,
302+
index,
303+
src,
304+
"scatter_mul_cpu",
305+
reduce_mul,
306+
include_self,
307+
dev_ctx);
296308
}
297309

298310
template <typename tensor_t, typename index_t>
@@ -301,11 +313,17 @@ void cpu_scatter_mean_kernel(phi::DenseTensor self,
301313
const phi::DenseTensor& index,
302314
phi::DenseTensor src,
303315
bool include_self,
304-
const phi::DeviceContext& ctx) {
316+
const phi::DeviceContext& dev_ctx) {
305317
cpu_gather_scatter_functor<tensor_t,
306318
index_t,
307-
/*is_scatter_like=*/true>()(
308-
self, dim, index, src, "scatter_mean_cpu", reduce_add, include_self, ctx);
319+
/*is_scatter_like=*/true>()(self,
320+
dim,
321+
index,
322+
src,
323+
"scatter_mean_cpu",
324+
reduce_add,
325+
include_self,
326+
dev_ctx);
309327
}
310328

311329
template <typename tensor_t, typename index_t>
@@ -314,11 +332,17 @@ void cpu_scatter_max_kernel(phi::DenseTensor self,
314332
const phi::DenseTensor& index,
315333
phi::DenseTensor src,
316334
bool include_self,
317-
const phi::DeviceContext& ctx) {
335+
const phi::DeviceContext& dev_ctx) {
318336
cpu_gather_scatter_functor<tensor_t,
319337
index_t,
320-
/*is_scatter_like=*/true>()(
321-
self, dim, index, src, "scatter_max_cpu", reduce_max, include_self, ctx);
338+
/*is_scatter_like=*/true>()(self,
339+
dim,
340+
index,
341+
src,
342+
"scatter_max_cpu",
343+
reduce_max,
344+
include_self,
345+
dev_ctx);
322346
}
323347

324348
template <typename tensor_t, typename index_t>
@@ -327,11 +351,17 @@ void cpu_scatter_min_kernel(phi::DenseTensor self,
327351
const phi::DenseTensor& index,
328352
phi::DenseTensor src,
329353
bool include_self,
330-
const phi::DeviceContext& ctx) {
354+
const phi::DeviceContext& dev_ctx) {
331355
cpu_gather_scatter_functor<tensor_t,
332356
index_t,
333-
/*is_scatter_like=*/true>()(
334-
self, dim, index, src, "scatter_min_cpu", reduce_min, include_self, ctx);
357+
/*is_scatter_like=*/true>()(self,
358+
dim,
359+
index,
360+
src,
361+
"scatter_min_cpu",
362+
reduce_min,
363+
include_self,
364+
dev_ctx);
335365
}
336366

337367
template <typename tensor_t, typename index_t>
@@ -340,7 +370,7 @@ void cpu_scatter_input_grad_kernel(phi::DenseTensor self UNUSED,
340370
const phi::DenseTensor& index,
341371
phi::DenseTensor grad,
342372
bool include_self UNUSED,
343-
const phi::DeviceContext& ctx UNUSED) {
373+
const phi::DeviceContext& dev_ctx UNUSED) {
344374
auto* index_data = index.data<index_t>();
345375
auto* grad_data = grad.data<tensor_t>();
346376

@@ -376,16 +406,17 @@ void cpu_scatter_input_grad_kernel(phi::DenseTensor self UNUSED,
376406
}
377407

378408
template <typename tensor_t, typename index_t>
379-
void cpu_scatter_mul_min_max_input_grad_kernel(phi::DenseTensor self UNUSED,
380-
int dim,
381-
const phi::DenseTensor& index,
382-
const phi::DenseTensor& out,
383-
const phi::DenseTensor& x,
384-
const phi::DenseTensor& value,
385-
phi::DenseTensor grad,
386-
const std::string& reduce,
387-
bool include_self UNUSED,
388-
const phi::DeviceContext& ctx) {
409+
void cpu_scatter_mul_min_max_input_grad_kernel(
410+
phi::DenseTensor self UNUSED,
411+
int dim,
412+
const phi::DenseTensor& index,
413+
const phi::DenseTensor& out,
414+
const phi::DenseTensor& x,
415+
const phi::DenseTensor& value,
416+
phi::DenseTensor grad,
417+
const std::string& reduce,
418+
bool include_self UNUSED,
419+
const phi::DeviceContext& dev_ctx) {
389420
auto* index_data = index.data<index_t>();
390421
auto* grad_data = grad.data<tensor_t>();
391422
auto* out_data = out.data<tensor_t>();
@@ -457,7 +488,8 @@ void cpu_scatter_mean_input_grad_kernel(phi::DenseTensor self UNUSED,
457488
const phi::DenseTensor& index,
458489
phi::DenseTensor grad,
459490
bool include_self UNUSED,
460-
const phi::DeviceContext& ctx UNUSED) {
491+
const phi::DeviceContext& dev_ctx
492+
UNUSED) {
461493
auto* index_data = index.data<index_t>();
462494
auto* grad_data = grad.data<tensor_t>();
463495

@@ -504,7 +536,7 @@ void cpu_scatter_value_grad_kernel(phi::DenseTensor self,
504536
const phi::DenseTensor& index,
505537
phi::DenseTensor grad,
506538
bool include_self UNUSED,
507-
const phi::DeviceContext& ctx UNUSED) {
539+
const phi::DeviceContext& dev_ctx UNUSED) {
508540
auto* self_data = self.data<tensor_t>();
509541
auto* index_data = index.data<index_t>();
510542
auto* grad_data = grad.data<tensor_t>();
@@ -564,7 +596,7 @@ void cpu_scatter_add_mean_value_grad_kernel(
564596
phi::DenseTensor grad,
565597
const std::string& reduce,
566598
bool include_self,
567-
const phi::DeviceContext& ctx UNUSED) {
599+
const phi::DeviceContext& dev_ctx UNUSED) {
568600
auto* self_data = self.data<tensor_t>();
569601
auto* index_data = index.data<index_t>();
570602
auto* grad_data = grad.data<tensor_t>();
@@ -643,16 +675,17 @@ void cpu_scatter_add_mean_value_grad_kernel(
643675
}
644676

645677
template <typename tensor_t, typename index_t>
646-
void cpu_scatter_mul_min_max_value_grad_kernel(phi::DenseTensor self,
647-
int dim,
648-
const phi::DenseTensor& index,
649-
const phi::DenseTensor& out,
650-
const phi::DenseTensor& x,
651-
const phi::DenseTensor& value,
652-
phi::DenseTensor grad,
653-
const std::string& reduce,
654-
bool include_self,
655-
const phi::DeviceContext& ctx) {
678+
void cpu_scatter_mul_min_max_value_grad_kernel(
679+
phi::DenseTensor self,
680+
int dim,
681+
const phi::DenseTensor& index,
682+
const phi::DenseTensor& out,
683+
const phi::DenseTensor& x,
684+
const phi::DenseTensor& value,
685+
phi::DenseTensor grad,
686+
const std::string& reduce,
687+
bool include_self,
688+
const phi::DeviceContext& dev_ctx) {
656689
auto* self_data = self.data<tensor_t>();
657690
auto* index_data = index.data<index_t>();
658691
auto* grad_data = grad.data<tensor_t>();

paddle/phi/kernels/funcs/math/cos_sim_functor.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ namespace phi {
1818
namespace math {
1919
template <typename T>
2020
struct CosSimDyFunctor<phi::CPUContext, T> {
21-
void operator()(const phi::CPUContext& ctx,
21+
void operator()(const phi::CPUContext& dev_ctx,
2222
const T* x_norm,
2323
const T* y_norm,
2424
const T* x,

paddle/phi/kernels/funcs/math/cos_sim_functor.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ __global__ void CosSimDyKernel(const T* x_norm,
5050

5151
template <typename T>
5252
struct CosSimDyFunctor<phi::GPUContext, T> {
53-
void operator()(const phi::GPUContext& ctx,
53+
void operator()(const phi::GPUContext& dev_ctx,
5454
const T* x_norm,
5555
const T* y_norm,
5656
const T* x,
@@ -63,7 +63,7 @@ struct CosSimDyFunctor<phi::GPUContext, T> {
6363
const int block_size = 512;
6464
dim3 threads(block_size, 1);
6565
dim3 grid((rows + block_size - 1) / block_size, 1);
66-
CosSimDyKernel<T><<<grid, threads, 0, ctx.stream()>>>(
66+
CosSimDyKernel<T><<<grid, threads, 0, dev_ctx.stream()>>>(
6767
x_norm, y_norm, x, y, z, dz, rows, cols, dy);
6868
}
6969
};

0 commit comments

Comments
 (0)