3131#include < unordered_map>
3232#include < unordered_set>
3333
34- #define SERVER_MAX_SWA_CHECKPOINTS_PER_SLOT 3
35-
3634using json = nlohmann::ordered_json;
3735
3836constexpr int HTTP_POLLING_SECONDS = 1 ;
@@ -3579,12 +3577,13 @@ struct server_context {
35793577
35803578 // make a checkpoint with the SWA memory
35813579 // checkpoints are needed only if we are not using "--swa-full"
3582- if (llama_model_n_swa (model) > 0 && !params_base.swa_full ) {
3583- if (slot.swa_checkpoints .size () >= SERVER_MAX_SWA_CHECKPOINTS_PER_SLOT ) {
3580+ if (llama_model_n_swa (model) > 0 && !params_base.swa_full && params_base. n_swa_checkpoints > 0 ) {
3581+ if (slot.swa_checkpoints .size () >= ( size_t ) params_base. n_swa_checkpoints ) {
35843582 {
35853583 const auto & cur = slot.swa_checkpoints .back ();
35863584
3587- SLT_WRN (slot, " SWA checkpoint erase, pos_min = %d, pos_max = %d, size = %.3f MiB\n " , cur.pos_min , cur.pos_max , (float ) cur.data .size () / 1024 / 1024 );
3585+ SLT_WRN (slot, " SWA checkpoint erase, pos_min = %d, pos_max = %d, size = %.3f MiB\n " ,
3586+ cur.pos_min , cur.pos_max , (float ) cur.data .size () / 1024 / 1024 );
35883587 }
35893588
35903589 slot.swa_checkpoints .erase (slot.swa_checkpoints .begin ());
@@ -3600,7 +3599,13 @@ struct server_context {
36003599
36013600 llama_state_seq_get_data_ext (ctx, cur.data .data (), swa_size, slot.id , LLAMA_STATE_SEQ_FLAGS_SWA_ONLY);
36023601
3603- SLT_WRN (slot, " SWA checkpoint create, pos_min = %d, pos_max = %d, size = %.3f MiB\n " , cur.pos_min , cur.pos_max , (float ) swa_size / 1024 / 1024 );
3602+ float size_total = 0 .0f ;
3603+ for (const auto & checkpoint : slot.swa_checkpoints ) {
3604+ size_total += (float ) checkpoint.data .size () / 1024 / 1024 ;
3605+ }
3606+
3607+ SLT_WRN (slot, " SWA checkpoint create, pos_min = %d, pos_max = %d, size = %.3f MiB, total = %d/%d (%.3f MiB)\n " ,
3608+ cur.pos_min , cur.pos_max , (float ) cur.data .size () / 1024 / 1024 , (int ) slot.swa_checkpoints .size (), params_base.n_swa_checkpoints , size_total);
36043609 }
36053610 } else if (slot.state != SLOT_STATE_GENERATING) {
36063611 continue ; // continue loop of slots
0 commit comments