Skip to content

Commit 31e1920

Browse files
committed
Avoid negative bools in library.
1 parent 2e74787 commit 31e1920

File tree

7 files changed

+20
-20
lines changed

7 files changed

+20
-20
lines changed

common/common.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1113,7 +1113,7 @@ struct llama_context_params common_context_params_to_llama(const common_params &
11131113
cparams.offload_kqv = !params.no_kv_offload;
11141114
cparams.flash_attn = params.flash_attn;
11151115
cparams.no_perf = params.no_perf;
1116-
cparams.disable_op_offload= params.disable_op_offload;
1116+
cparams.op_offload = !params.disable_op_offload;
11171117

11181118
if (params.reranking) {
11191119
cparams.embeddings = true;

ggml/include/ggml-backend.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ extern "C" {
289289
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
290290

291291
// Initialize a backend scheduler, backends with low index are given priority over backends with high index
292-
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel, bool disable_op_offload);
292+
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel, bool op_offload);
293293
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
294294

295295
// Initialize backend buffers from a measure graph

ggml/src/ggml-backend.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ struct ggml_backend_sched {
674674
char * context_buffer;
675675
size_t context_buffer_size;
676676

677-
bool disable_op_offload;
677+
bool op_offload;
678678

679679
int debug;
680680
};
@@ -768,7 +768,7 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
768768
if (tensor->op != GGML_OP_ROPE && src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
769769
int src_backend_id = ggml_backend_sched_backend_from_buffer(sched, src, tensor);
770770
// check if a backend with higher prio wants to offload the op
771-
if (!sched->disable_op_offload && src_backend_id == sched->n_backends - 1 && ggml_backend_buffer_is_host(src->buffer)) {
771+
if (sched->op_offload && src_backend_id == sched->n_backends - 1 && ggml_backend_buffer_is_host(src->buffer)) {
772772
for (int b = 0; b < src_backend_id; b++) {
773773
if (ggml_backend_supports_op(sched->backends[b], tensor) && ggml_backend_offload_op(sched->backends[b], tensor)) {
774774
SET_CAUSE(tensor, "1.off");
@@ -1455,7 +1455,7 @@ ggml_backend_sched_t ggml_backend_sched_new(
14551455
int n_backends,
14561456
size_t graph_size,
14571457
bool parallel,
1458-
bool disable_op_offload) {
1458+
bool op_offload) {
14591459
GGML_ASSERT(n_backends > 0);
14601460
GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS);
14611461
GGML_ASSERT(ggml_backend_dev_type(ggml_backend_get_device(backends[n_backends - 1])) == GGML_BACKEND_DEVICE_TYPE_CPU);
@@ -1500,7 +1500,7 @@ ggml_backend_sched_t ggml_backend_sched_new(
15001500
}
15011501

15021502
sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends);
1503-
sched->disable_op_offload = disable_op_offload;
1503+
sched->op_offload = op_offload;
15041504

15051505
ggml_backend_sched_reset(sched);
15061506

include/llama.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ extern "C" {
362362
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
363363
bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
364364
bool no_perf; // whether to measure performance timings
365-
bool disable_op_offload; // whether to disable offload host tensor operations to device globally
365+
bool op_offload; // whether to offload host tensor operations to device
366366
};
367367

368368
// model quantization parameters

src/llama-context.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ llama_context::llama_context(
9393
}
9494

9595
cparams.n_ubatch = std::min(cparams.n_batch, params.n_ubatch == 0 ? params.n_batch : params.n_ubatch);
96-
cparams.disable_op_offload = params.disable_op_offload;
96+
cparams.op_offload = params.op_offload;
9797

9898
const uint32_t n_ctx_per_seq = cparams.n_ctx / cparams.n_seq_max;
9999

@@ -244,7 +244,7 @@ llama_context::llama_context(
244244
}
245245
}
246246

247-
sched.reset(ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), max_nodes, pipeline_parallel, cparams.disable_op_offload));
247+
sched.reset(ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), max_nodes, pipeline_parallel, cparams.op_offload));
248248

249249
if (pipeline_parallel) {
250250
LLAMA_LOG_INFO("%s: pipeline parallelism enabled (n_copies=%d)\n", __func__, ggml_backend_sched_get_n_copies(sched.get()));
@@ -1872,7 +1872,7 @@ llama_context_params llama_context_default_params() {
18721872
/*.offload_kqv =*/ true,
18731873
/*.flash_attn =*/ false,
18741874
/*.no_perf =*/ true,
1875-
/*.disable_op_offload =*/ false,
1875+
/*.op_offload =*/ true,
18761876
};
18771877

18781878
return result;

src/llama-cparams.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ struct llama_cparams {
3030
bool flash_attn;
3131
bool no_perf;
3232
bool warmup;
33-
bool disable_op_offload;
33+
bool op_offload;
3434

3535
enum llama_pooling_type pooling_type;
3636

tools/llama-bench/llama-bench.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -908,15 +908,15 @@ struct cmd_params_instance {
908908
llama_context_params to_llama_cparams() const {
909909
llama_context_params cparams = llama_context_default_params();
910910

911-
cparams.n_ctx = n_prompt + n_gen + n_depth;
912-
cparams.n_batch = n_batch;
913-
cparams.n_ubatch = n_ubatch;
914-
cparams.type_k = type_k;
915-
cparams.type_v = type_v;
916-
cparams.offload_kqv = !no_kv_offload;
917-
cparams.flash_attn = flash_attn;
918-
cparams.embeddings = embeddings;
919-
cparams.disable_op_offload = disable_op_offload;
911+
cparams.n_ctx = n_prompt + n_gen + n_depth;
912+
cparams.n_batch = n_batch;
913+
cparams.n_ubatch = n_ubatch;
914+
cparams.type_k = type_k;
915+
cparams.type_v = type_v;
916+
cparams.offload_kqv = !no_kv_offload;
917+
cparams.flash_attn = flash_attn;
918+
cparams.embeddings = embeddings;
919+
cparams.op_offload = !disable_op_offload;
920920

921921
return cparams;
922922
}

0 commit comments

Comments
 (0)