diff --git a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc index 20f130cb37208..f03d7a160e104 100644 --- a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc @@ -57,7 +57,7 @@ bool validateReduceOpAttrs(const Node* node, EXPECT_TRUE( !PADDLE_GET_CONST(bool, op->GetAttr("reduce_all")), ::paddle::string::Sprintf( - "The LayerNorm fusion %s" + "The LayerNorm fusion %s " "reduction must have \'reduce_all\' attribute set to false.", name)); } diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 5d504c71ff103..fa0df97f219b2 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -1420,7 +1420,7 @@ struct SimpleOpTypeSetTeller : public Teller { #endif if (dtype != -1 && dtype != 2 && dtype != 3 && dtype != 5 && dtype != 6) { VLOG(3) - << "the fill_any_like only supports int32/int64/float32/float64 by" + << "the fill_any_like only supports int32/int64/float32/float64 by " "trt8.4 below"; return false; } diff --git a/paddle/fluid/operators/fused/fused_adam_op.cc b/paddle/fluid/operators/fused/fused_adam_op.cc index 932bdbfd90a6c..7a890e3e96150 100644 --- a/paddle/fluid/operators/fused/fused_adam_op.cc +++ b/paddle/fluid/operators/fused/fused_adam_op.cc @@ -115,7 +115,7 @@ class FusedAdamOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(0); AddAttr("use_adamw", "(bool, default False) " - "Whether to use AdamW" + "Whether to use AdamW. " "True for decoupled weight decay") .SetDefault(false); AddAttr("multi_precision", diff --git a/paddle/fluid/operators/fused/fused_attention_op.cc b/paddle/fluid/operators/fused/fused_attention_op.cc index c69e9f9849739..fc58a32ef7c0a 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cc +++ b/paddle/fluid/operators/fused/fused_attention_op.cc @@ -124,9 +124,9 @@ class FusedAttentionOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(y_dim.size(), 2, common::errors::InvalidArgument( - "The dimensions of qkv_weight must be 2 if enable" - "transpose_qkv_wb: (dim_embed, 3 * dim_embed)," - "but received dimensions of" + "The dimensions of qkv_weight must be 2 if enable " + "transpose_qkv_wb: (dim_embed, 3 * dim_embed), " + "but received dimensions of " "Input is [%d]", y_dim.size())); PADDLE_ENFORCE_GT(num_heads, @@ -159,7 +159,7 @@ class FusedAttentionOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(y_dim.size(), 4, common::errors::InvalidArgument( - "The dimensions of qkv_weight must be 4 if not" + "The dimensions of qkv_weight must be 4 if not " "enable transpose_qkv_wb: (3, num_head, dim_head, " "dim_embed), but received [%d]", y_dim.size())); @@ -186,8 +186,8 @@ class FusedAttentionOp : public framework::OperatorWithKernel { x_dim.size(), 3, common::errors::InvalidArgument("The dimensions of x must be 3" - "(batch_size, seq_len, dim_embed)," - "but received dimensions of" + "(batch_size, seq_len, dim_embed), " + "but received dimensions of " "Input is [%d]", x_dim.size())); @@ -431,7 +431,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { "attn_dropout_implementation", "[\"downgrade_in_infer\"|\"upscale_in_train\"]" "There are two kinds of ways to implement dropout" - "(the mask below is a tensor have the same shape with input" + "(the mask below is a tensor have the same shape with input, " "the value of mask is 0 or 1, the ratio of 0 is dropout_rate)" "1. downgrade_in_infer(default), downgrade the outcome at inference " "time" diff --git a/paddle/fluid/operators/fused/fused_conv2d_op.cc b/paddle/fluid/operators/fused/fused_conv2d_op.cc index 04d2d4043bf96..fb7bb428ef24b 100644 --- a/paddle/fluid/operators/fused/fused_conv2d_op.cc +++ b/paddle/fluid/operators/fused/fused_conv2d_op.cc @@ -53,13 +53,13 @@ TODO: Documentation of conv2d op. protected: void Apply() { AddInput("Bias", - "(Tensor) Bias to be added to each output of filter application." - "The format of output tensor is X (one-dimensional) of size equal" + "(Tensor) Bias to be added to each output of filter application. " + "The format of output tensor is X (one-dimensional) of size equal " "to the number of output channels. Only used with MKL-DNN.") .AsDispensable(); AddInput("ResidualData", "(Tensor) Tensor with residual data " - "to which convolution output will be added." + "to which convolution output will be added. " "Used with fuse_residual_connection fusion.") .AsDispensable(); AddAttr("fuse_activation", diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc index 28a87239f3769..c4a1ce652c905 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc +++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc @@ -72,16 +72,16 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { x_dim.size(), 3, common::errors::InvalidArgument("The dimensions of x must be 3" - "(batch_size, seq_len, dim_embed)," - "but received dimensions of" + "(batch_size, seq_len, dim_embed), " + "but received dimensions of " "Input is [%d]", x_dim.size())); PADDLE_ENFORCE_EQ( y_dim.size(), 4, common::errors::InvalidArgument("The dimensions of qkv_weight must be 4" - "(3, num_head, dim_head, dim_embed)," - "but received dimensions of" + "(3, num_head, dim_head, dim_embed), " + "but received dimensions of " "Input is [%d]", y_dim.size())); PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/operators/fused/multi_gru_op.cc b/paddle/fluid/operators/fused/multi_gru_op.cc index 93c688d149ac7..d2a262e2bac76 100644 --- a/paddle/fluid/operators/fused/multi_gru_op.cc +++ b/paddle/fluid/operators/fused/multi_gru_op.cc @@ -64,7 +64,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { wx_dims[i][0], x_mat_dims[1], common::errors::InvalidArgument( - "The first dimension of flattened WeightX #%d" + "The first dimension of flattened WeightX #%d " "should equal to last dimension of flattened input X, but " "received fattened WeightX dimension is:%d, flattened X dimension " "is:%d", @@ -205,7 +205,7 @@ void MultiGRUOpMaker::Make() { "Number of stacked GRU layers.") .SetDefault(1); AddAttr("origin_mode", - "bool" + "bool " "use origin mode in article https://arxiv.org/abs/1412.3555") .SetDefault(false); AddAttr( diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc index 88737990847f3..cef735b1fdac8 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc @@ -50,7 +50,7 @@ class SequenceMaskOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "The input tensor of sequence_mask op."); AddOutput("Y", "The output mask of sequence_mask op."); AddInput("MaxLenTensor", - "Max length tensor" + "Max length tensor " "have higher priority than maxlen attribute") .AsDispensable(); AddAttr("maxlen",