PaddlePaddle
diff --git a/‎paddle/fluid/operators/softmax_with_cross_entropy_op.cc
Lines changed: 3 additions & 4 deletions b/‎paddle/fluid/operators/softmax_with_cross_entropy_op.cc
Lines changed: 3 additions & 4 deletions
diff --git a/‎paddle/fluid/operators/softmax_with_cross_entropy_op.cu
Lines changed: 4 additions & 4 deletions b/‎paddle/fluid/operators/softmax_with_cross_entropy_op.cu
Lines changed: 4 additions & 4 deletions
diff --git a/‎paddle/fluid/operators/softmax_with_cross_entropy_op.h
Lines changed: 8 additions & 8 deletions b/‎paddle/fluid/operators/softmax_with_cross_entropy_op.h
Lines changed: 8 additions & 8 deletions
@@ -55,7 +55,7 @@ class SoftmaxWithCrossEntropyOpMaker
         "the given labels as soft labels.")
         .SetDefault(false);
     AddAttr<bool>(
-        "softmax_switch",
+        "use_softmax",
         "(bool, default: true), A flag to indicate whether to do softmax ")
         .SetDefault(true);
     AddAttr<bool>(
@@ -320,7 +320,6 @@ REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy_grad,
 REGISTER_OP_VERSION(softmax_with_cross_entropy)
     .AddCheckpoint(
         R"ROC(
-              Add a new attribute [softmax_switch] )ROC",
+              Add a new attribute [use_softmax] )ROC",
         paddle::framework::compatible::OpVersionDesc().NewAttr(
-            "softmax_switch", "A flag to indicate whether to do softmax",
-            true));
+            "use_softmax", "A flag to indicate whether to do softmax", true));
@@ -772,10 +772,10 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel<T> {
         platform::is_gpu_place(context.GetPlace()), true,
         platform::errors::Unavailable("softmax_with_cross_entropy operator's "
                                       "CUDA kernel only runs on GPU device."));
-    const bool softmax_switch = context.Attr<bool>("softmax_switch");
+    const bool use_softmax = context.Attr<bool>("use_softmax");
 
     // do not with softmax op, and input is softmax
-    if (!softmax_switch) {
+    if (!use_softmax) {
       const Tensor* softmax = context.Input<Tensor>("Logits");
       const Tensor* labels = context.Input<Tensor>("Label");
       Tensor* softmax_out = context.Output<Tensor>("Softmax");
@@ -925,10 +925,10 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel<T> {
     int block = 512;
     auto stream = context.cuda_device_context().stream();
     auto ignore_index = context.Attr<int>("ignore_index");
-    auto softmax_switch = context.Attr<bool>("softmax_switch");
+    auto use_softmax = context.Attr<bool>("use_softmax");
 
     // do not with softmax op, and input is softmax
-    if (!softmax_switch) {
+    if (!use_softmax) {
       if (context.Attr<bool>("soft_label")) {
         int grid = (n * d + block - 1) / block;
         const T* label_data = labels->data<T>();
 
@@ -31,10 +31,10 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE_EQ(
         platform::is_cpu_place(context.GetPlace()), true,
         platform::errors::Unimplemented("This kernel only runs on CPU."));
-    const bool softmax_switch = context.Attr<bool>("softmax_switch");
+    const bool use_softmax = context.Attr<bool>("use_softmax");
 
     // do not with softmax op, and input is softmax
-    if (!softmax_switch) {
+    if (!use_softmax) {
       const Tensor* softmax = context.Input<Tensor>("Logits");
       const Tensor* labels = context.Input<Tensor>("Label");
       Tensor* softmax_out = context.Output<Tensor>("Softmax");
@@ -113,9 +113,9 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel<T> {
         context.Output<Tensor>(framework::GradVarName("Logits"));
 
     const Tensor* softmax = context.Input<Tensor>("Softmax");
-    const bool softmax_switch = context.Attr<bool>("softmax_switch");
+    const bool use_softmax = context.Attr<bool>("use_softmax");
 
-    if (logit_grad != softmax || !softmax_switch) {
+    if (logit_grad != softmax || !use_softmax) {
       framework::TensorCopy(*softmax, context.GetPlace(),
                             context.device_context(), logit_grad);
     }
@@ -138,8 +138,8 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel<T> {
     auto logit_grad_mat = framework::EigenMatrix<T>::From(logit_grad_2d);
     auto& place = *context.template device_context<platform::CPUDeviceContext>()
                        .eigen_device();
-    if (!softmax_switch) {
-      // softmax_switch step1
+    if (!use_softmax) {
+      // use_softmax step1
       if (soft_label) {
         auto lbl_mat = framework::EigenMatrix<T>::From(labels_2d);
         logit_grad_mat.device(place) =
@@ -148,7 +148,7 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel<T> {
             out_grad_mat.broadcast(Eigen::DSizes<int, 2>(1, axis_dim)) *
             logit_grad_mat;
       }
-      // softmax_switch step2
+      // use_softmax step2
       else {
         const int64_t* label_data = labels->data<int64_t>();
         T* logit_grad_data = logit_grad->data<T>();
@@ -181,7 +181,7 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel<T> {
       return;
     }
 
-    // for softmax_switch=False, continue
+    // for use_softmax=False, continue
 
     if (soft_label) {
       // when soft_label = True, ignore_index is not supported