diff --git a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc
index 20f130cb37208..f03d7a160e104 100644
--- a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc
@@ -57,7 +57,7 @@ bool validateReduceOpAttrs(const Node* node,
     EXPECT_TRUE(
         !PADDLE_GET_CONST(bool, op->GetAttr("reduce_all")),
         ::paddle::string::Sprintf(
-            "The LayerNorm fusion %s"
+            "The LayerNorm fusion %s "
             "reduction must have \'reduce_all\' attribute set to false.",
             name));
   }
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index 5d504c71ff103..fa0df97f219b2 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -1420,7 +1420,7 @@ struct SimpleOpTypeSetTeller : public Teller {
 #endif
       if (dtype != -1 && dtype != 2 && dtype != 3 && dtype != 5 && dtype != 6) {
         VLOG(3)
-            << "the fill_any_like only supports int32/int64/float32/float64 by"
+            << "the fill_any_like only supports int32/int64/float32/float64 by "
                "trt8.4 below";
         return false;
       }
diff --git a/paddle/fluid/operators/fused/fused_adam_op.cc b/paddle/fluid/operators/fused/fused_adam_op.cc
index 932bdbfd90a6c..7a890e3e96150 100644
--- a/paddle/fluid/operators/fused/fused_adam_op.cc
+++ b/paddle/fluid/operators/fused/fused_adam_op.cc
@@ -115,7 +115,7 @@ class FusedAdamOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault(0);
     AddAttr<bool>("use_adamw",
                   "(bool, default False) "
-                  "Whether to use AdamW"
+                  "Whether to use AdamW. "
                   "True for decoupled weight decay")
         .SetDefault(false);
     AddAttr<bool>("multi_precision",
diff --git a/paddle/fluid/operators/fused/fused_attention_op.cc b/paddle/fluid/operators/fused/fused_attention_op.cc
index c69e9f9849739..fc58a32ef7c0a 100644
--- a/paddle/fluid/operators/fused/fused_attention_op.cc
+++ b/paddle/fluid/operators/fused/fused_attention_op.cc
@@ -124,9 +124,9 @@ class FusedAttentionOp : public framework::OperatorWithKernel {
       PADDLE_ENFORCE_EQ(y_dim.size(),
                         2,
                         common::errors::InvalidArgument(
-                            "The dimensions of qkv_weight must be 2 if enable"
-                            "transpose_qkv_wb: (dim_embed, 3 * dim_embed),"
-                            "but received dimensions of"
+                            "The dimensions of qkv_weight must be 2 if enable "
+                            "transpose_qkv_wb: (dim_embed, 3 * dim_embed), "
+                            "but received dimensions of "
                             "Input is [%d]",
                             y_dim.size()));
       PADDLE_ENFORCE_GT(num_heads,
@@ -159,7 +159,7 @@ class FusedAttentionOp : public framework::OperatorWithKernel {
       PADDLE_ENFORCE_EQ(y_dim.size(),
                         4,
                         common::errors::InvalidArgument(
-                            "The dimensions of qkv_weight must be 4 if not"
+                            "The dimensions of qkv_weight must be 4 if not "
                             "enable transpose_qkv_wb: (3, num_head, dim_head, "
                             "dim_embed), but received [%d]",
                             y_dim.size()));
@@ -186,8 +186,8 @@ class FusedAttentionOp : public framework::OperatorWithKernel {
         x_dim.size(),
         3,
         common::errors::InvalidArgument("The dimensions of x must be 3"
-                                        "(batch_size, seq_len, dim_embed),"
-                                        "but received dimensions of"
+                                        "(batch_size, seq_len, dim_embed), "
+                                        "but received dimensions of "
                                         "Input is [%d]",
                                         x_dim.size()));
 
@@ -431,7 +431,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker {
         "attn_dropout_implementation",
         "[\"downgrade_in_infer\"|\"upscale_in_train\"]"
         "There are two kinds of ways to implement dropout"
-        "(the mask below is a tensor have the same shape with input"
+        "(the mask below is a tensor have the same shape with input, "
         "the value of mask is 0 or 1, the ratio of 0 is dropout_rate)"
         "1. downgrade_in_infer(default), downgrade the outcome at inference "
         "time"
diff --git a/paddle/fluid/operators/fused/fused_conv2d_op.cc b/paddle/fluid/operators/fused/fused_conv2d_op.cc
index 04d2d4043bf96..fb7bb428ef24b 100644
--- a/paddle/fluid/operators/fused/fused_conv2d_op.cc
+++ b/paddle/fluid/operators/fused/fused_conv2d_op.cc
@@ -53,13 +53,13 @@ TODO: Documentation of conv2d op.
  protected:
   void Apply() {
     AddInput("Bias",
-             "(Tensor) Bias to be added to each output of filter application."
-             "The format of output tensor is X (one-dimensional) of size equal"
+             "(Tensor) Bias to be added to each output of filter application. "
+             "The format of output tensor is X (one-dimensional) of size equal "
              "to the number of output channels. Only used with MKL-DNN.")
         .AsDispensable();
     AddInput("ResidualData",
              "(Tensor) Tensor with residual data "
-             "to which convolution output will be added."
+             "to which convolution output will be added. "
              "Used with fuse_residual_connection fusion.")
         .AsDispensable();
     AddAttr<std::string>("fuse_activation",
diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc
index 28a87239f3769..c4a1ce652c905 100644
--- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc
+++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc
@@ -72,16 +72,16 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel {
         x_dim.size(),
         3,
         common::errors::InvalidArgument("The dimensions of x must be 3"
-                                        "(batch_size, seq_len, dim_embed),"
-                                        "but received dimensions of"
+                                        "(batch_size, seq_len, dim_embed), "
+                                        "but received dimensions of "
                                         "Input is [%d]",
                                         x_dim.size()));
     PADDLE_ENFORCE_EQ(
         y_dim.size(),
         4,
         common::errors::InvalidArgument("The dimensions of qkv_weight must be 4"
-                                        "(3, num_head, dim_head, dim_embed),"
-                                        "but received dimensions of"
+                                        "(3, num_head, dim_head, dim_embed), "
+                                        "but received dimensions of "
                                         "Input is [%d]",
                                         y_dim.size()));
     PADDLE_ENFORCE_EQ(
diff --git a/paddle/fluid/operators/fused/multi_gru_op.cc b/paddle/fluid/operators/fused/multi_gru_op.cc
index 93c688d149ac7..d2a262e2bac76 100644
--- a/paddle/fluid/operators/fused/multi_gru_op.cc
+++ b/paddle/fluid/operators/fused/multi_gru_op.cc
@@ -64,7 +64,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const {
         wx_dims[i][0],
         x_mat_dims[1],
         common::errors::InvalidArgument(
-            "The first dimension of flattened WeightX #%d"
+            "The first dimension of flattened WeightX #%d "
             "should equal to last dimension of flattened input X, but "
             "received fattened WeightX dimension is:%d, flattened X dimension "
             "is:%d",
@@ -205,7 +205,7 @@ void MultiGRUOpMaker::Make() {
                "Number of stacked GRU layers.")
       .SetDefault(1);
   AddAttr<bool>("origin_mode",
-                "bool"
+                "bool "
                 "use origin mode in article https://arxiv.org/abs/1412.3555")
       .SetDefault(false);
   AddAttr<std::string>(
diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc
index 88737990847f3..cef735b1fdac8 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc
@@ -50,7 +50,7 @@ class SequenceMaskOpMaker : public framework::OpProtoAndCheckerMaker {
     AddInput("X", "The input tensor of sequence_mask op.");
     AddOutput("Y", "The output mask of sequence_mask op.");
     AddInput("MaxLenTensor",
-             "Max length tensor"
+             "Max length tensor "
              "have higher priority than maxlen attribute")
         .AsDispensable();
     AddAttr<int>("maxlen",