Skip to content

Commit b742804

Browse files
committed
try reduce pipeline parallelism in order to reduce compute buffer sizes
1 parent 798473d commit b742804

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

ggml/src/ggml-backend.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,8 @@ static bool ggml_is_view_op(enum ggml_op op) {
662662
#endif
663663

664664
#ifndef GGML_SCHED_MAX_COPIES
665-
#define GGML_SCHED_MAX_COPIES 4
665+
//kcpp reduced from 4 to 2 to try make buffer sizes smaller on multigpu
666+
#define GGML_SCHED_MAX_COPIES 2
666667
#endif
667668

668669
struct ggml_backend_sched_split {

koboldcpp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
extra_images_max = 4 # for kontext/qwen img
6767

6868
# global vars
69-
KcppVersion = "1.103"
69+
KcppVersion = "1.104"
7070
showdebug = True
7171
kcpp_instance = None #global running instance
7272
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":""}

0 commit comments

Comments
 (0)