Skip to content

Commit 0d4ff95

Browse files
committed
can shift
1 parent c44de8a commit 0d4ff95

File tree

2 files changed

+19
-12
lines changed

2 files changed

+19
-12
lines changed

examples/server/server.cpp

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,9 +1671,8 @@ struct server_response {
16711671
}
16721672

16731673
void add_waiting_tasks(const std::vector<server_task> & tasks) {
1674-
std::unique_lock<std::mutex> lock(mutex_results);
1675-
16761674
for (const auto & task : tasks) {
1675+
std::unique_lock<std::mutex> lock(mutex_results);
16771676
SRV_DBG("add task %d to waiting list. current waiting = %d (before add)\n", task.id, (int) waiting_task_ids.size());
16781677
waiting_task_ids.insert(task.id);
16791678
}
@@ -1683,20 +1682,24 @@ struct server_response {
16831682
void remove_waiting_task_id(int id_task) {
16841683
SRV_DBG("remove task %d from waiting list. current waiting = %d (before remove)\n", id_task, (int) waiting_task_ids.size());
16851684

1686-
std::unique_lock<std::mutex> lock(mutex_results);
1687-
waiting_task_ids.erase(id_task);
1685+
{
1686+
std::unique_lock<std::mutex> lock(mutex_results);
1687+
waiting_task_ids.erase(id_task);
1688+
}
16881689
// make sure to clean up all pending results
1689-
queue_results.erase(
1690-
std::remove_if(queue_results.begin(), queue_results.end(), [id_task](const server_task_result_ptr & res) {
1691-
return res->id == id_task;
1692-
}),
1693-
queue_results.end());
1690+
{
1691+
std::unique_lock<std::mutex> lock(mutex_results);
1692+
queue_results.erase(
1693+
std::remove_if(queue_results.begin(), queue_results.end(), [id_task](const server_task_result_ptr & res) {
1694+
return res->id == id_task;
1695+
}),
1696+
queue_results.end());
1697+
}
16941698
}
16951699

16961700
void remove_waiting_task_ids(const std::unordered_set<int> & id_tasks) {
1697-
std::unique_lock<std::mutex> lock(mutex_results);
1698-
16991701
for (const auto & id_task : id_tasks) {
1702+
std::unique_lock<std::mutex> lock(mutex_results);
17001703
SRV_DBG("remove task %d from waiting list. current waiting = %d (before remove)\n", id_task, (int) waiting_task_ids.size());
17011704
waiting_task_ids.erase(id_task);
17021705
}
@@ -3841,6 +3844,10 @@ int main(int argc, char ** argv) {
38413844
// TODO: this log can become very long, put it behind a flag or think about a more compact format
38423845
//SRV_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get<std::string>().c_str() : prompt.dump(2).c_str());
38433846

3847+
if (prompt.contains("chat_history")) {
3848+
return;
3849+
}
3850+
38443851
std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true);
38453852
tasks.reserve(tokenized_prompts.size());
38463853
for (size_t i = 0; i < tokenized_prompts.size(); i++) {

src/llama-kv-cache.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ bool llama_kv_cache_init(
3232

3333
cache.recurrent = llama_model_is_recurrent(&model);
3434
cache.v_trans = !cache.recurrent && !cparams.flash_attn;
35-
cache.can_shift = !cache.recurrent && model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA
35+
cache.can_shift = !cache.recurrent; // not supported due to MLA
3636

3737
LLAMA_LOG_INFO("%s: kv_size = %d, offload = %d, type_k = '%s', type_v = '%s', n_layer = %d, can_shift = %d\n",
3838
__func__, kv_size, offload, ggml_type_name(type_k), ggml_type_name(type_v), n_layer, cache.can_shift);

0 commit comments

Comments
 (0)