Skip to content

Commit 3304b44

Browse files
committed
more strict condition
1 parent b353038 commit 3304b44

File tree

3 files changed

+30
-28
lines changed

3 files changed

+30
-28
lines changed

tools/server/server.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1988,12 +1988,12 @@ struct server_context {
19881988

19891989
if (params_base.ctx_shift) {
19901990
params_base.ctx_shift = false;
1991-
SRV_INF("%s\n", "ctx_shift is not supported by multimodal, it will be disabled");
1991+
SRV_WRN("%s\n", "ctx_shift is not supported by multimodal, it will be disabled");
19921992
}
19931993

19941994
if (params_base.n_cache_reuse) {
19951995
params_base.n_cache_reuse = 0;
1996-
SRV_INF("%s\n", "cache_reuse is not supported by multimodal, it will be disabled");
1996+
SRV_WRN("%s\n", "cache_reuse is not supported by multimodal, it will be disabled");
19971997
}
19981998

19991999
if (!params_base.speculative.model.path.empty()) {
@@ -2417,6 +2417,15 @@ struct server_context {
24172417
queue_results.send(std::move(res));
24182418
}
24192419

2420+
// if multimodal is enabled, send an error and return false
2421+
bool ensure_no_mtmd(const int id_task) {
2422+
if (mctx) {
2423+
send_error(id_task, "This feature is not supported by multimodal", ERROR_TYPE_NOT_SUPPORTED);
2424+
return false;
2425+
}
2426+
return true;
2427+
}
2428+
24202429
void send_partial_response(server_slot & slot, const completion_token_output & tkn) {
24212430
auto res = std::make_unique<server_task_result_cmpl_partial>();
24222431

@@ -2766,12 +2775,9 @@ struct server_context {
27662775
} break;
27672776
case SERVER_TASK_TYPE_SLOT_SAVE:
27682777
{
2778+
if (!ensure_no_mtmd(task.id)) break;
27692779
int id_slot = task.slot_action.slot_id;
27702780
server_slot * slot = get_slot_by_id(id_slot);
2771-
if (mctx) {
2772-
send_error(task, "This feature is not supported by multimodal", ERROR_TYPE_NOT_SUPPORTED);
2773-
break;
2774-
}
27752781
if (slot == nullptr) {
27762782
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
27772783
break;
@@ -2807,10 +2813,7 @@ struct server_context {
28072813
} break;
28082814
case SERVER_TASK_TYPE_SLOT_RESTORE:
28092815
{
2810-
if (mctx) {
2811-
send_error(task, "This feature is not supported by multimodal", ERROR_TYPE_NOT_SUPPORTED);
2812-
break;
2813-
}
2816+
if (!ensure_no_mtmd(task.id)) break;
28142817
int id_slot = task.slot_action.slot_id;
28152818
server_slot * slot = get_slot_by_id(id_slot);
28162819
if (slot == nullptr) {
@@ -2857,10 +2860,7 @@ struct server_context {
28572860
} break;
28582861
case SERVER_TASK_TYPE_SLOT_ERASE:
28592862
{
2860-
if (mctx) {
2861-
send_error(task, "This feature is not supported by multimodal", ERROR_TYPE_NOT_SUPPORTED);
2862-
break;
2863-
}
2863+
if (!ensure_no_mtmd(task.id)) break;
28642864
int id_slot = task.slot_action.slot_id;
28652865
server_slot * slot = get_slot_by_id(id_slot);
28662866
if (slot == nullptr) {
@@ -3417,7 +3417,7 @@ struct server_context {
34173417
}
34183418

34193419
if (mctx) {
3420-
// we should never reach this
3420+
// we should never reach this, as speculative is automatically disabled if mmproj is loaded
34213421
GGML_ABORT("not supported by multimodal");
34223422
}
34233423

tools/server/tests/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import wget
2727

2828

29-
DEFAULT_HTTP_TIMEOUT = 120
29+
DEFAULT_HTTP_TIMEOUT = 12
3030

3131
if "LLAMA_SANITIZE" in os.environ or "GITHUB_ACTION" in os.environ:
3232
DEFAULT_HTTP_TIMEOUT = 30

tools/server/utils.hpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,7 +1078,6 @@ struct server_tokens {
10781078
auto img_tokens = mtmd_input_chunk_get_tokens_image(chunk);
10791079
const int n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
10801080
llama_pos start_pos = tokens.size();
1081-
printf("start_pos = %d, n_pos = %d\n", start_pos, n_pos);
10821081
for (int i = 0; i < n_pos; ++i) {
10831082
tokens.emplace_back(LLAMA_TOKEN_NULL);
10841083
}
@@ -1095,10 +1094,24 @@ struct server_tokens {
10951094
}
10961095
}
10971096

1097+
// for compatibility with context shift and prompt truncation
10981098
void insert(llama_tokens & inp_tokens) {
1099+
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
10991100
tokens.insert(tokens.end(), inp_tokens.begin(), inp_tokens.end());
11001101
}
11011102

1103+
// for compatibility with speculative decoding, ctx shift, slot save/load
1104+
const llama_tokens & get_text_tokens() const {
1105+
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
1106+
return tokens;
1107+
}
1108+
1109+
// for compatibility with speculative decoding
1110+
void set_token(llama_pos pos, llama_token id) {
1111+
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
1112+
tokens[pos] = id;
1113+
}
1114+
11021115
size_t size() const {
11031116
return tokens.size();
11041117
}
@@ -1129,17 +1142,6 @@ struct server_tokens {
11291142
tokens.resize(n);
11301143
}
11311144

1132-
// for compatibility with speculative decoding, ctx shift, slot save/load
1133-
const llama_tokens & get_text_tokens() const {
1134-
return tokens;
1135-
}
1136-
1137-
// for compatibility with speculative decoding
1138-
void set_token(llama_pos pos, llama_token id) {
1139-
// TODO: may need validation
1140-
tokens[pos] = id;
1141-
}
1142-
11431145
std::string detokenize(const llama_context * ctx, bool special) const {
11441146
llama_tokens text_tokens;
11451147
text_tokens.reserve(tokens.size());

0 commit comments

Comments
 (0)