@@ -3314,30 +3314,40 @@ struct server_context {
33143314 const auto pos_min_thold = std::max (0 , slot.n_past - n_swa);
33153315
33163316 if (pos_min > pos_min_thold) {
3317+ SLT_WRN (slot, " n_past = %d, cache_tokens.size() = %d, seq_id = %d, pos_min = %d, n_swa = %d\n " , slot.n_past , (int ) slot.cache_tokens .size (), slot.id , pos_min, n_swa);
3318+
33173319 // search for a SWA checkpoint
3318- auto it = std::find_if (
3320+ const auto it = std::find_if (
33193321 slot.swa_checkpoints .rbegin (),
33203322 slot.swa_checkpoints .rend (),
33213323 [&](const auto & cur) {
33223324 return cur.pos_min <= pos_min_thold;
33233325 }
33243326 );
33253327
3326- if (it == slot.swa_checkpoints .rend ()) {
3327- SLT_WRN (slot, " n_past = %d, cache_tokens.size() = %d, seq_id = %d, pos_min = %d, n_swa = %d\n " , slot.n_past , (int ) slot.cache_tokens .size (), slot.id , pos_min, n_swa);
3328- SLT_WRN (slot, " forcing full prompt re-processing due to lack of cache data (likely due to SWA, see %s)\n " ,
3329- " https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055" );
3328+ bool do_reset = it == slot.swa_checkpoints .rend ();
33303329
3331- slot.n_past = 0 ;
3332- slot.swa_checkpoints .clear ();
3333- } else {
3330+ if (!do_reset) {
33343331 // restore the checkpoint
33353332 const size_t swa_size = it->data .size ();
3336- llama_state_seq_set_data (ctx, it->data .data (), swa_size, slot.id );
3333+ const size_t n = llama_state_seq_set_data (ctx, it->data .data (), swa_size, slot.id );
3334+
3335+ if (n != swa_size) {
3336+ SLT_ERR (slot, " failed to restore SWA checkpoint, pos_min = %d, pos_max = %d, size = %.3f MiB\n " , it->pos_min , it->pos_max , (float ) swa_size / 1024 / 1024 );
3337+ do_reset = true ;
3338+ } else {
3339+ slot.n_past = std::min (slot.n_past , it->pos_max );
33373340
3338- slot.n_past = std::min (slot.n_past , it->pos_max );
3341+ SLT_WRN (slot, " SWA checkpoint restore, pos_min = %d, pos_max = %d, size = %.3f MiB\n " , it->pos_min , it->pos_max , (float ) swa_size / 1024 / 1024 );
3342+ }
3343+ }
33393344
3340- SLT_WRN (slot, " SWA checkpoint restore, pos_min = %d, pos_max = %d, size = %.3f MiB\n " , it->pos_min , it->pos_max , (float ) swa_size / 1024 / 1024 );
3345+ if (do_reset) {
3346+ SLT_WRN (slot, " forcing full prompt re-processing due to lack of cache data (likely due to SWA, see %s)\n " ,
3347+ " https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055" );
3348+
3349+ slot.n_past = 0 ;
3350+ slot.swa_checkpoints .clear ();
33413351 }
33423352 }
33433353 }
0 commit comments