PaddlePaddle · luotao1 · Aug 11, 2025 · Aug 9, 2025
diff --git a/paddle/phi/kernels/legacy/compare_kernel.h b/paddle/phi/kernels/legacy/compare_kernel.h
@@ -19,42 +19,42 @@ limitations under the License. */
 namespace phi {
 
 template <typename T, typename Context>
-void LessThanRawKernel(const Context& ctx,
+void LessThanRawKernel(const Context& dev_ctx,
                        const DenseTensor& x,
                        const DenseTensor& y,
                        int axis,
                        DenseTensor* out);
 
 template <typename T, typename Context>
-void LessEqualRawKernel(const Context& ctx,
+void LessEqualRawKernel(const Context& dev_ctx,
                         const DenseTensor& x,
                         const DenseTensor& y,
                         int axis,
                         DenseTensor* out);
 
 template <typename T, typename Context>
-void GreaterThanRawKernel(const Context& ctx,
+void GreaterThanRawKernel(const Context& dev_ctx,
                           const DenseTensor& x,
                           const DenseTensor& y,
                           int axis,
                           DenseTensor* out);
 
 template <typename T, typename Context>
-void GreaterEqualRawKernel(const Context& ctx,
+void GreaterEqualRawKernel(const Context& dev_ctx,
                            const DenseTensor& x,
                            const DenseTensor& y,
                            int axis,
                            DenseTensor* out);
 
 template <typename T, typename Context>
-void EqualRawKernel(const Context& ctx,
+void EqualRawKernel(const Context& dev_ctx,
                     const DenseTensor& x,
                     const DenseTensor& y,
                     int axis,
                     DenseTensor* out);
 
 template <typename T, typename Context>
-void NotEqualRawKernel(const Context& ctx,
+void NotEqualRawKernel(const Context& dev_ctx,
                        const DenseTensor& x,
                        const DenseTensor& y,
                        int axis,

diff --git a/paddle/phi/kernels/legacy/cpu/compare_kernel.cc b/paddle/phi/kernels/legacy/cpu/compare_kernel.cc
@@ -25,88 +25,88 @@ template <typename T,
           typename Context,
           typename Functor,
           typename InverseFunctor>
-inline void CompareRawKernelImpl(const Context& ctx,
+inline void CompareRawKernelImpl(const Context& dev_ctx,
                                  const DenseTensor& x,
                                  const DenseTensor& y,
                                  int axis,
                                  DenseTensor* out) {
-  ctx.template Alloc<bool>(out);
+  dev_ctx.template Alloc<bool>(out);
   if (x.dims().size() >= y.dims().size()) {
     funcs::ElementwiseCompute<Functor, T, bool>(
-        ctx, x, y, Functor(), out, axis);
+        dev_ctx, x, y, Functor(), out, axis);
   } else {
     funcs::ElementwiseCompute<InverseFunctor, T, bool>(
-        ctx, x, y, InverseFunctor(), out, axis);
+        dev_ctx, x, y, InverseFunctor(), out, axis);
   }
 }
 
 template <typename T, typename Context>
-void LessThanRawKernel(const Context& ctx,
+void LessThanRawKernel(const Context& dev_ctx,
                        const DenseTensor& x,
                        const DenseTensor& y,
                        int axis,
                        DenseTensor* out) {
   CompareRawKernelImpl<T,
                        Context,
                        funcs::LessThanFunctor<T>,
-                       funcs::GreaterThanFunctor<T>>(ctx, x, y, axis, out);
+                       funcs::GreaterThanFunctor<T>>(dev_ctx, x, y, axis, out);
 }
 
 template <typename T, typename Context>
-void LessEqualRawKernel(const Context& ctx,
+void LessEqualRawKernel(const Context& dev_ctx,
                         const DenseTensor& x,
                         const DenseTensor& y,
                         int axis,
                         DenseTensor* out) {
   CompareRawKernelImpl<T,
                        Context,
                        funcs::LessEqualFunctor<T>,
-                       funcs::GreaterEqualFunctor<T>>(ctx, x, y, axis, out);
+                       funcs::GreaterEqualFunctor<T>>(dev_ctx, x, y, axis, out);
 }
 
 template <typename T, typename Context>
-void GreaterThanRawKernel(const Context& ctx,
+void GreaterThanRawKernel(const Context& dev_ctx,
                           const DenseTensor& x,
                           const DenseTensor& y,
                           int axis,
                           DenseTensor* out) {
   CompareRawKernelImpl<T,
                        Context,
                        funcs::GreaterThanFunctor<T>,
-                       funcs::LessThanFunctor<T>>(ctx, x, y, axis, out);
+                       funcs::LessThanFunctor<T>>(dev_ctx, x, y, axis, out);
 }
 template <typename T, typename Context>
-void GreaterEqualRawKernel(const Context& ctx,
+void GreaterEqualRawKernel(const Context& dev_ctx,
                            const DenseTensor& x,
                            const DenseTensor& y,
                            int axis,
                            DenseTensor* out) {
   CompareRawKernelImpl<T,
                        Context,
                        funcs::GreaterEqualFunctor<T>,
-                       funcs::LessEqualFunctor<T>>(ctx, x, y, axis, out);
+                       funcs::LessEqualFunctor<T>>(dev_ctx, x, y, axis, out);
 }
 template <typename T, typename Context>
-void EqualRawKernel(const Context& ctx,
+void EqualRawKernel(const Context& dev_ctx,
                     const DenseTensor& x,
                     const DenseTensor& y,
                     int axis,
                     DenseTensor* out) {
   CompareRawKernelImpl<T,
                        Context,
                        funcs::EqualFunctor<T>,
-                       funcs::EqualFunctor<T>>(ctx, x, y, axis, out);
+                       funcs::EqualFunctor<T>>(dev_ctx, x, y, axis, out);
 }
 template <typename T, typename Context>
-void NotEqualRawKernel(const Context& ctx,
+void NotEqualRawKernel(const Context& dev_ctx,
                        const DenseTensor& x,
                        const DenseTensor& y,
                        int axis,
                        DenseTensor* out) {
   CompareRawKernelImpl<T,
                        Context,
                        funcs::NotEqualFunctor<T>,
-                       funcs::NotEqualFunctor<T>>(ctx, x, y, axis, out);
+                       funcs::NotEqualFunctor<T>>(dev_ctx, x, y, axis, out);
 }
 }  // namespace phi
 

diff --git a/paddle/phi/kernels/legacy/cpu/legacy_generate_proposals_kernel.cc b/paddle/phi/kernels/legacy/cpu/legacy_generate_proposals_kernel.cc
@@ -28,7 +28,7 @@ namespace phi {
 
 template <typename T>
 std::pair<phi::DenseTensor, phi::DenseTensor> ProposalForOneImage(
-    const phi::CPUContext &ctx,
+    const phi::CPUContext &dev_ctx,
     const phi::DenseTensor &im_info_slice,
     const phi::DenseTensor &anchors,
     const phi::DenseTensor &variances,
@@ -44,7 +44,7 @@ std::pair<phi::DenseTensor, phi::DenseTensor> ProposalForOneImage(
   // Sort index
   phi::DenseTensor index_t;
   index_t.Resize({scores_slice.numel()});
-  int *index = ctx.Alloc<int>(&index_t);
+  int *index = dev_ctx.Alloc<int>(&index_t);
   for (int i = 0; i < scores_slice.numel(); ++i) {
     index[i] = i;
   }
@@ -65,64 +65,65 @@ std::pair<phi::DenseTensor, phi::DenseTensor> ProposalForOneImage(
   bbox_sel.Resize({index_t.numel(), 4});
   anchor_sel.Resize({index_t.numel(), 4});
   var_sel.Resize({index_t.numel(), 4});
-  ctx.Alloc<T>(&scores_sel);
-  ctx.Alloc<T>(&bbox_sel);
-  ctx.Alloc<T>(&anchor_sel);
-  ctx.Alloc<T>(&var_sel);
+  dev_ctx.Alloc<T>(&scores_sel);
+  dev_ctx.Alloc<T>(&bbox_sel);
+  dev_ctx.Alloc<T>(&anchor_sel);
+  dev_ctx.Alloc<T>(&var_sel);
 
-  phi::funcs::CPUGather<T>(ctx, scores_slice, index_t, &scores_sel);
-  phi::funcs::CPUGather<T>(ctx, bbox_deltas_slice, index_t, &bbox_sel);
-  phi::funcs::CPUGather<T>(ctx, anchors, index_t, &anchor_sel);
-  phi::funcs::CPUGather<T>(ctx, variances, index_t, &var_sel);
+  phi::funcs::CPUGather<T>(dev_ctx, scores_slice, index_t, &scores_sel);
+  phi::funcs::CPUGather<T>(dev_ctx, bbox_deltas_slice, index_t, &bbox_sel);
+  phi::funcs::CPUGather<T>(dev_ctx, anchors, index_t, &anchor_sel);
+  phi::funcs::CPUGather<T>(dev_ctx, variances, index_t, &var_sel);
 
   phi::DenseTensor proposals;
   proposals.Resize({index_t.numel(), 4});
-  ctx.Alloc<T>(&proposals);
-  phi::funcs::BoxCoder<T>(ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals);
+  dev_ctx.Alloc<T>(&proposals);
+  phi::funcs::BoxCoder<T>(
+      dev_ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals);
 
   phi::funcs::ClipTiledBoxes<T>(
-      ctx, im_info_slice, proposals, &proposals, false);
+      dev_ctx, im_info_slice, proposals, &proposals, false);
 
   phi::DenseTensor keep;
   phi::funcs::FilterBoxes<T>(
-      ctx, &proposals, min_size, im_info_slice, true, &keep);
+      dev_ctx, &proposals, min_size, im_info_slice, true, &keep);
   // Handle the case when there is no keep index left
   if (keep.numel() == 0) {
     phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
     bbox_sel.Resize({1, 4});
-    ctx.Alloc<T>(&bbox_sel);
-    set_zero(ctx, &bbox_sel, static_cast<T>(0));
+    dev_ctx.Alloc<T>(&bbox_sel);
+    set_zero(dev_ctx, &bbox_sel, static_cast<T>(0));
     phi::DenseTensor scores_filter;
     scores_filter.Resize({1, 1});
-    ctx.Alloc<T>(&scores_filter);
-    set_zero(ctx, &scores_filter, static_cast<T>(0));
+    dev_ctx.Alloc<T>(&scores_filter);
+    set_zero(dev_ctx, &scores_filter, static_cast<T>(0));
     return std::make_pair(bbox_sel, scores_filter);
   }
 
   phi::DenseTensor scores_filter;
   bbox_sel.Resize({keep.numel(), 4});
   scores_filter.Resize({keep.numel(), 1});
-  ctx.Alloc<T>(&bbox_sel);
-  ctx.Alloc<T>(&scores_filter);
-  phi::funcs::CPUGather<T>(ctx, proposals, keep, &bbox_sel);
-  phi::funcs::CPUGather<T>(ctx, scores_sel, keep, &scores_filter);
+  dev_ctx.Alloc<T>(&bbox_sel);
+  dev_ctx.Alloc<T>(&scores_filter);
+  phi::funcs::CPUGather<T>(dev_ctx, proposals, keep, &bbox_sel);
+  phi::funcs::CPUGather<T>(dev_ctx, scores_sel, keep, &scores_filter);
   if (nms_thresh <= 0) {
     return std::make_pair(bbox_sel, scores_filter);
   }
 
   phi::DenseTensor keep_nms =
-      phi::funcs::NMS<T>(ctx, &bbox_sel, &scores_filter, nms_thresh, eta);
+      phi::funcs::NMS<T>(dev_ctx, &bbox_sel, &scores_filter, nms_thresh, eta);
 
   if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
     keep_nms.Resize({post_nms_top_n});
   }
 
   proposals.Resize({keep_nms.numel(), 4});
   scores_sel.Resize({keep_nms.numel(), 1});
-  ctx.Alloc<T>(&proposals);
-  ctx.Alloc<T>(&scores_sel);
-  phi::funcs::CPUGather<T>(ctx, bbox_sel, keep_nms, &proposals);
-  phi::funcs::CPUGather<T>(ctx, scores_filter, keep_nms, &scores_sel);
+  dev_ctx.Alloc<T>(&proposals);
+  dev_ctx.Alloc<T>(&scores_sel);
+  phi::funcs::CPUGather<T>(dev_ctx, bbox_sel, keep_nms, &proposals);
+  phi::funcs::CPUGather<T>(dev_ctx, scores_filter, keep_nms, &scores_sel);
 
   return std::make_pair(proposals, scores_sel);
 }

diff --git a/paddle/phi/kernels/legacy/cpu/one_hot_kernel.cc b/paddle/phi/kernels/legacy/cpu/one_hot_kernel.cc
@@ -25,20 +25,20 @@ struct OneHotV2OpFunctor {
   const DenseTensor* in_;
   DenseTensor* out_;
   int depth_;
-  const DeviceContext& ctx_;
+  const DeviceContext& dev_ctx_;
 
   OneHotV2OpFunctor(const DenseTensor* in,
                     DenseTensor* out,
                     int depth,
-                    const DeviceContext& ctx)
-      : in_(in), out_(out), depth_(depth), ctx_(ctx) {}
+                    const DeviceContext& dev_ctx)
+      : in_(in), out_(out), depth_(depth), dev_ctx_(dev_ctx) {}
 
   template <typename OutT>
   void apply() const {
     auto* p_in_data = in_->data<InT>();
     auto numel = in_->numel();
-    auto* p_out_data = ctx_.template Alloc<OutT>(out_);
-    funcs::set_constant(ctx_, out_, 0.0);
+    auto* p_out_data = dev_ctx_.template Alloc<OutT>(out_);
+    funcs::set_constant(dev_ctx_, out_, 0.0);
 
     for (int i = 0; i < numel; ++i) {
       PADDLE_ENFORCE_GE(

diff --git a/paddle/phi/kernels/legacy/gpu/layer_norm_cuda_kernel.h b/paddle/phi/kernels/legacy/gpu/layer_norm_cuda_kernel.h
@@ -943,7 +943,7 @@ void HostApplyRMSNorm(V* output,
 }
 
 template <typename T, typename Context>
-void cuda_rms_norm(const Context& ctx,
+void cuda_rms_norm(const Context& dev_ctx,
                    const DenseTensor& x,
                    const DenseTensor& scale,
                    int rows,
@@ -960,7 +960,7 @@ void cuda_rms_norm(const Context& ctx,
       cols,                                                  \
       epsilon,                                               \
       const_cast<scalar_t_out*>(scale.data<scalar_t_out>()), \
-      ctx.stream())
+      dev_ctx.stream())
   // scale.dtype() same as y->dtype()
   if (scale.dtype() == phi::DataType::FLOAT32) {
     DISPATCH_FWD_CASE(float);
@@ -971,7 +971,7 @@ void cuda_rms_norm(const Context& ctx,
 }
 
 template <typename T, typename U, typename V, typename Context>
-void HostRMSNormGradient(const Context& ctx,
+void HostRMSNormGradient(const Context& dev_ctx,
                          const V* dout,
                          const U* invvar,
                          const DenseTensor& input,
@@ -992,7 +992,7 @@ void HostRMSNormGradient(const Context& ctx,
     const int nshared2 = nshared2_a > nshared2_b ? nshared2_a : nshared2_b;
     auto place = input.place();
     DenseTensor part_grad_gamma =
-        phi::Empty<float, Context>(ctx, {part_size, n2});
+        phi::Empty<float, Context>(dev_ctx, {part_size, n2});
     cuComputePartGradGammaBeta<<<blocks2, threads2, nshared2, stream>>>(
         dout,
         input.data<T>(),
@@ -1038,7 +1038,7 @@ void HostRMSNormGradient(const Context& ctx,
 }
 
 template <typename T, typename Context>
-void cuda_rms_norm_gradient(const Context& ctx,
+void cuda_rms_norm_gradient(const Context& dev_ctx,
                             const DenseTensor& x,
                             const DenseTensor& scale,
                             const DenseTensor& invvar,
@@ -1050,7 +1050,7 @@ void cuda_rms_norm_gradient(const Context& ctx,
                             DenseTensor* grad_scale) {
 #define DISPATCH_BWD_CASE(scalar_t_out)                 \
   HostRMSNormGradient<T, float, scalar_t_out, Context>( \
-      ctx,                                              \
+      dev_ctx,                                          \
       dy.data<scalar_t_out>(),                          \
       invvar.data<float>(),                             \
       x,                                                \
@@ -1060,7 +1060,7 @@ void cuda_rms_norm_gradient(const Context& ctx,
       epsilon,                                          \
       grad_x->data<T>(),                                \
       grad_scale->data<scalar_t_out>(),                 \
-      ctx.stream())
+      dev_ctx.stream())
   if (scale.dtype() == phi::DataType::FLOAT32) {
     DISPATCH_BWD_CASE(float);
   } else if (scale.dtype() == phi::DataType::BFLOAT16) {