File tree Expand file tree Collapse file tree 2 files changed +1
-9
lines changed Expand file tree Collapse file tree 2 files changed +1
-9
lines changed Original file line number Diff line number Diff line change @@ -158,8 +158,6 @@ int main(int argc, char ** argv) {
158158
159159 LOG_INF (" \n\n " );
160160
161- const int n_ctx = llama_n_ctx (ctx);
162-
163161 std::vector<client> clients (n_clients);
164162 for (size_t i = 0 ; i < clients.size (); ++i) {
165163 auto & client = clients[i];
Original file line number Diff line number Diff line change @@ -1959,13 +1959,7 @@ struct server_context {
19591959
19601960 // the update_slots() logic will always submit a maximum of n_batch or n_parallel tokens
19611961 // note that n_batch can be > n_ctx (e.g. for non-causal attention models such as BERT where the KV cache is not used)
1962- {
1963- const int32_t n_batch = llama_n_batch (ctx);
1964-
1965- // only a single seq_id per token is needed
1966- batch.clear ();
1967- }
1968-
1962+ batch.clear ();
19691963 metrics.init ();
19701964 }
19711965
You can’t perform that action at this time.
0 commit comments