@@ -3750,10 +3750,10 @@ struct llama_context_params llama_context_default_params() {
37503750 /* .logits_all =*/ false ,
37513751 /* .embeddings =*/ false ,
37523752 /* .offload_kqv =*/ true ,
3753- /* .flash_attn =*/ false ,
3753+ /* .flash_attn =*/ true ,
37543754 /* .mla_attn =*/ 0 ,
37553755 /* .attn_max_batch =*/ 0 ,
3756- /* .fused_moe_up_gate =*/ false ,
3756+ /* .fused_moe_up_gate =*/ true ,
37573757 /* .grouped_expert_routing =*/ false ,
37583758 /* .fused_up_gate =*/ true ,
37593759 /* .fused_mmad =*/ true ,
@@ -4040,19 +4040,19 @@ struct llama_context * llama_new_context_with_model(
40404040 cparams.mla_attn = 0 ;
40414041 }
40424042
4043- LLAMA_LOG_INFO (" %s: n_ctx = %u\n " , __func__, cparams.n_ctx );
4044- LLAMA_LOG_INFO (" %s: n_batch = %u\n " , __func__, cparams.n_batch );
4045- LLAMA_LOG_INFO (" %s: n_ubatch = %u\n " , __func__, cparams.n_ubatch );
4046- LLAMA_LOG_INFO (" %s: flash_attn = %d\n " , __func__, cparams.flash_attn );
4047- LLAMA_LOG_INFO (" %s: mla_attn = %d\n " , __func__, cparams.mla_attn );
4048- LLAMA_LOG_INFO (" %s: attn_max_b = %d\n " , __func__, cparams.attn_max_batch );
4049- LLAMA_LOG_INFO (" %s: fused_moe = %d\n " , __func__, cparams.fused_moe_up_gate );
4050- LLAMA_LOG_INFO (" %s: grouped er = %d\n " , __func__, cparams.grouped_expert_routing );
4043+ LLAMA_LOG_INFO (" %s: n_ctx = %u\n " , __func__, cparams.n_ctx );
4044+ LLAMA_LOG_INFO (" %s: n_batch = %u\n " , __func__, cparams.n_batch );
4045+ LLAMA_LOG_INFO (" %s: n_ubatch = %u\n " , __func__, cparams.n_ubatch );
4046+ LLAMA_LOG_INFO (" %s: flash_attn = %d\n " , __func__, cparams.flash_attn );
4047+ LLAMA_LOG_INFO (" %s: mla_attn = %d\n " , __func__, cparams.mla_attn );
4048+ LLAMA_LOG_INFO (" %s: attn_max_b = %d\n " , __func__, cparams.attn_max_batch );
4049+ LLAMA_LOG_INFO (" %s: fused_moe = %d\n " , __func__, cparams.fused_moe_up_gate );
4050+ LLAMA_LOG_INFO (" %s: grouped er = %d\n " , __func__, cparams.grouped_expert_routing );
40514051 LLAMA_LOG_INFO (" %s: fused_up_gate = %d\n " , __func__, cparams.fused_up_gate );
4052- LLAMA_LOG_INFO (" %s: fused_mmad = %d\n " , __func__, cparams.fused_mmad );
4053- LLAMA_LOG_INFO (" %s: ser = %d, %g\n " , __func__, cparams.min_experts , cparams.thresh_experts );
4054- LLAMA_LOG_INFO (" %s: freq_base = %.1f\n " , __func__, cparams.rope_freq_base );
4055- LLAMA_LOG_INFO (" %s: freq_scale = %g\n " , __func__, cparams.rope_freq_scale );
4052+ LLAMA_LOG_INFO (" %s: fused_mmad = %d\n " , __func__, cparams.fused_mmad );
4053+ LLAMA_LOG_INFO (" %s: ser = %d, %g\n " , __func__, cparams.min_experts , cparams.thresh_experts );
4054+ LLAMA_LOG_INFO (" %s: freq_base = %.1f\n " , __func__, cparams.rope_freq_base );
4055+ LLAMA_LOG_INFO (" %s: freq_scale = %g\n " , __func__, cparams.rope_freq_scale );
40564056
40574057 ctx->abort_callback = params.abort_callback ;
40584058 ctx->abort_callback_data = params.abort_callback_data ;
0 commit comments