@@ -1057,7 +1057,7 @@ PD_BUILD_STATIC_OP(append_attention)
1057
1057
paddle::Optional (" kv_signal_data" ),
1058
1058
paddle::Optional (" q_norm_weight" ),
1059
1059
paddle::Optional (" k_norm_weight" )})
1060
- .Outputs({" fmha_out" , " qkv_out " , " key_cache_out" , " value_cache_out" })
1060
+ .Outputs({" fmha_out" , " key_cache_out" , " value_cache_out" })
1061
1061
.SetInplaceMap({{" key_cache" , " key_cache_out" },
1062
1062
{" value_cache" , " value_cache_out" }})
1063
1063
.Attrs({" rms_norm_eps: float" ,
@@ -1123,7 +1123,8 @@ PD_BUILD_STATIC_OP(append_attention_with_output)
1123
1123
.SetInplaceMap({{" fmha_out" , " fmha_out_out" },
1124
1124
{" key_cache" , " key_cache_out" },
1125
1125
{" value_cache" , " value_cache_out" }})
1126
- .Attrs({" compute_type: std::string" ,
1126
+ .Attrs({" rms_norm_eps: float" ,
1127
+ " compute_type: std::string" ,
1127
1128
" cache_quant_type: std::string" ,
1128
1129
" use_neox_rotary_style: bool" ,
1129
1130
" rope_3d: bool" ,
@@ -1138,7 +1139,7 @@ PD_BUILD_STATIC_OP(append_attention_with_output)
1138
1139
" speculate_max_draft_token_num: int" ,
1139
1140
" causal: bool" ,
1140
1141
" speculate_decoder: bool" ,
1141
- " rms_norm_eps: float " })
1142
+ })
1142
1143
.SetKernelFn(PD_KERNEL(AppendAttentionWithOutput))
1143
1144
.SetInferShapeFn(PD_INFER_SHAPE(AppendAttentionWithOutputInferShape))
1144
1145
.SetInferDtypeFn(PD_INFER_DTYPE(AppendAttentionWithOutputInferDtype));
0 commit comments