@@ -3081,7 +3081,6 @@ struct server_context {
30813081 // without pooling, we want to output the embeddings for all the tokens in the batch
30823082 const bool need_embd = slot.task_type == SERVER_TASK_TYPE_EMBEDDING && llama_pooling_type (slot.ctx ) == LLAMA_POOLING_TYPE_NONE;
30833083
3084- // batch.add_text(prompt_tokens[slot.n_past], slot.n_past, slot.id, need_embd);
30853084 llama_batch_ext_add_text (batch.get (), prompt_tokens[slot.n_past ], slot.n_past , &slot.id , 1 , need_embd);
30863085
30873086 if (slot.params .cache_prompt ) {
@@ -3109,7 +3108,6 @@ struct server_context {
31093108 }
31103109
31113110 // extract the logits only for the last token
3112- // batch.set_logits_last();
31133111 llama_batch_ext_set_output_last (batch.get ());
31143112
31153113 slot.n_decoded = 0 ;
@@ -3280,13 +3278,10 @@ struct server_context {
32803278 }
32813279
32823280 // construct the speculation batch
3283- // slot.batch_spec.clear();
3284- // slot.batch_spec.add_text(id, slot.n_past, slot.id, true);
32853281 llama_batch_ext_clear (slot.batch_spec .get ());
32863282 llama_batch_ext_add_text (slot.batch_spec .get (), id, slot.n_past , &slot.id , 1 , true );
32873283
32883284 for (size_t i = 0 ; i < draft.size (); ++i) {
3289- // slot.batch_spec.add_text(draft[i], slot.n_past + 1 + i, slot.id, true);
32903285 llama_batch_ext_add_text (slot.batch_spec .get (), draft[i], slot.n_past + 1 + i, &slot.id , 1 , true );
32913286 }
32923287
0 commit comments