@@ -739,12 +739,13 @@ PD_BUILD_STATIC_OP(append_attention)
739
739
paddle::Optional (" out_linear_shifts" ),
740
740
paddle::Optional (" out_linear_smooths" ),
741
741
paddle::Optional (" kv_signal_data" ),
742
- paddle::Optional (" q_norm_weight" ),
743
- paddle::Optional (" k_norm_weight" )})
742
+ paddle::Optional (" q_norm_weight" ),
743
+ paddle::Optional (" k_norm_weight" )})
744
744
.Outputs({" fmha_out" , " qkv_out" , " key_cache_out" , " value_cache_out" })
745
745
.SetInplaceMap({{" key_cache" , " key_cache_out" },
746
746
{" value_cache" , " value_cache_out" }})
747
- .Attrs({" compute_type: std::string" ,
747
+ .Attrs({" rms_norm_eps: float" ,
748
+ " compute_type: std::string" ,
748
749
" cache_quant_type: std::string" ,
749
750
" use_neox_rotary_style: bool" ,
750
751
" rope_3d: bool" ,
@@ -759,7 +760,7 @@ PD_BUILD_STATIC_OP(append_attention)
759
760
" speculate_max_draft_token_num: int" ,
760
761
" causal: bool" ,
761
762
" speculate_decoder: bool" ,
762
- " rms_norm_eps: float " })
763
+ })
763
764
.SetKernelFn(PD_KERNEL(AppendAttention))
764
765
.SetInferShapeFn(PD_INFER_SHAPE(AppendAttentionInferShape))
765
766
.SetInferDtypeFn(PD_INFER_DTYPE(AppendAttentionInferDtype));
0 commit comments