@@ -99,6 +99,7 @@ struct slot_params {
9999
100100 uint32_t seed = -1 ; // RNG seed
101101 int32_t n_keep = 0 ; // number of tokens to keep from initial prompt
102+ int32_t n_discard = 0 ; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
102103 int32_t n_predict = -1 ; // new tokens to predict
103104
104105 std::vector<std::string> antiprompt;
@@ -846,6 +847,7 @@ struct server_context {
846847 slot.sparams .mirostat_eta = json_value (data, " mirostat_eta" , default_sparams.mirostat_eta );
847848 slot.sparams .penalize_nl = json_value (data, " penalize_nl" , default_sparams.penalize_nl );
848849 slot.params .n_keep = json_value (data, " n_keep" , slot.params .n_keep );
850+ slot.params .n_discard = json_value (data, " n_discard" , default_params.n_discard );
849851 slot.params .seed = json_value (data, " seed" , default_params.seed );
850852 slot.sparams .n_probs = json_value (data, " n_probs" , default_sparams.n_probs );
851853 slot.sparams .min_keep = json_value (data, " min_keep" , default_sparams.min_keep );
@@ -1253,6 +1255,7 @@ struct server_context {
12531255 {" stop" , slot.params .antiprompt },
12541256 {" n_predict" , slot.params .n_predict }, // TODO: fix duplicate key n_predict
12551257 {" n_keep" , slot.params .n_keep },
1258+ {" n_discard" , slot.params .n_discard },
12561259 {" ignore_eos" , ignore_eos},
12571260 {" stream" , slot.params .stream },
12581261 {" logit_bias" , slot.sparams .logit_bias },
@@ -1696,7 +1699,7 @@ struct server_context {
16961699 // Shift context
16971700 const int n_keep = slot.params .n_keep + add_bos_token;
16981701 const int n_left = (int ) system_tokens.size () + slot.n_past - n_keep;
1699- const int n_discard = n_left / 2 ;
1702+ const int n_discard = slot. params . n_discard ? slot. params . n_discard : ( n_left / 2 ) ;
17001703
17011704 LOG_INFO (" slot context shift" , {
17021705 {" id_slot" , slot.id },
0 commit comments