Skip to content

Commit eaf9388

Browse files
authored
Merge branch 'ggerganov:master' into master
2 parents 92eca17 + 10433e8 commit eaf9388

File tree

4 files changed

+8
-9
lines changed

4 files changed

+8
-9
lines changed

examples/server/server.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,22 +1090,21 @@ struct server_context {
10901090
size_t pos = std::min(slot.n_sent_text, slot.generated_text.size());
10911091

10921092
const std::string str_test = slot.generated_text.substr(pos);
1093-
bool is_stop_full = false;
1093+
bool send_text = true;
10941094

10951095
size_t stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_FULL);
10961096
if (stop_pos != std::string::npos) {
1097-
is_stop_full = true;
10981097
slot.generated_text.erase(
10991098
slot.generated_text.begin() + pos + stop_pos,
11001099
slot.generated_text.end());
11011100
pos = std::min(slot.n_sent_text, slot.generated_text.size());
1102-
} else {
1103-
is_stop_full = false;
1101+
} else if (slot.has_next_token) {
11041102
stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_PARTIAL);
1103+
send_text = stop_pos == std::string::npos;
11051104
}
11061105

11071106
// check if there is any token to predict
1108-
if (stop_pos == std::string::npos || (!slot.has_next_token && !is_stop_full && stop_pos > 0)) {
1107+
if (send_text) {
11091108
// no send the stop word in the response
11101109
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
11111110
slot.n_sent_text += result.text_to_send.size();

ggml/src/ggml-alloc.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,6 @@ struct tensor_alloc {
348348
};
349349

350350
struct leaf_alloc {
351-
int buffer_id;
352351
struct tensor_alloc leaf;
353352
};
354353

@@ -740,7 +739,6 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
740739
for (int i = 0; i < graph->n_leafs; i++) {
741740
struct ggml_tensor * leaf = graph->leafs[i];
742741
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
743-
galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
744742
if (leaf->view_src || leaf->data) {
745743
galloc->leaf_allocs[i].leaf.buffer_id = -1;
746744
galloc->leaf_allocs[i].leaf.offset = SIZE_MAX;

scripts/sync-ggml.last

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
564f42082f858f9674b2a2e06e9e779d9ed2c754
1+
2327bda7a55ac6b72614ac5ebd5c5a5e02553b9b

src/llama.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16095,9 +16095,11 @@ struct llm_build_context {
1609516095
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
1609616096

1609716097
cur = llm_build_norm(ctx0, cur, hparams, model.output_norm, model.output_norm_b, LLM_NORM, cb, -1);
16098-
cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
16098+
cb(cur, "result_norm", -1);
1609916099

16100+
cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
1610016101
cb(cur, "result_output", -1);
16102+
1610116103
ggml_build_forward_expand(gf, cur);
1610216104

1610316105
return gf;

0 commit comments

Comments
 (0)