Skip to content

Commit b04362f

Browse files
committed
Merge commit '00131d6eaf4df029e1ec84de868c2c5957503007' into concedo_experimental
# Conflicts: # docs/ops.md # examples/save-load-state/save-load-state.cpp # ggml/CMakeLists.txt # ggml/src/ggml-cann/aclnn_ops.cpp # ggml/src/ggml-cann/aclnn_ops.h # ggml/src/ggml-cann/ggml-cann.cpp # ggml/src/ggml-hip/CMakeLists.txt # ggml/src/ggml-sycl/cpy.cpp # ggml/src/ggml-sycl/cpy.hpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-sycl/set_rows.cpp # scripts/server-bench.py # tests/CMakeLists.txt # tests/test-backend-ops.cpp # tests/test-thread-safety.cpp # tools/llama-bench/llama-bench.cpp
2 parents cd0dc0a + 00131d6 commit b04362f

34 files changed

+17143
-4129
lines changed

common/chat.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1944,6 +1944,8 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
19441944
}
19451945
}
19461946
auto msg = builder.result();
1947-
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
1947+
if (!is_partial) {
1948+
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
1949+
}
19481950
return msg;
19491951
}

docs/ops/CANN.csv

Lines changed: 8133 additions & 0 deletions
Large diffs are not rendered by default.

docs/ops/OpenCL.csv

Lines changed: 8133 additions & 0 deletions
Large diffs are not rendered by default.

examples/embedding/embedding.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,14 @@ int main(int argc, char ** argv) {
8181

8282
params.embedding = true;
8383

84+
// if the number of prompts that would be encoded is known in advance, it's more efficient to specify the
85+
// --parallel argument accordingly. for convenience, if not specified, we fallback to unified KV cache
86+
// in order to support any number of prompts
87+
if (params.n_parallel == 1) {
88+
LOG_INF("%s: n_parallel == 1 -> unified KV cache is enabled\n", __func__);
89+
params.kv_unified = true;
90+
}
91+
8492
// utilize the full context
8593
if (params.n_batch < params.n_ctx) {
8694
LOG_WRN("%s: setting batch size to %d\n", __func__, params.n_ctx);

0 commit comments

Comments
 (0)