Skip to content

Commit 6674f8f

Browse files
committed
Added MOE options - llama.cpp PR 15077
--cpu-moe to keep all MoE weights in the CPU --n-cpu-moe N to keep the MoE weights of the first N layers in the CPU ggml-org/llama.cpp#15077
1 parent 4ad7e11 commit 6674f8f

File tree

5 files changed

+48
-3
lines changed

5 files changed

+48
-3
lines changed

.gitignore

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,14 @@ llama_cpp_launcher_configs.json
33
__pycache__
44
.gitignore
55
llama.log
6-
llama.log
76
log.txt
87
tensor_overrides/Kimi-K2-Instruct-UD-IQ1_S-00001-of-00006_Kimi-K2-Instruct-UD-IQ1_S_kv=q4_0_vv=q4_0_th=24_tb=48_ctx=131072_fa_tensor_params.txt
98
tensor_overrides/Kimi-K2-Instruct-UD-IQ1_S-00001-of-00006_Kimi-K2-Instruct-UD-IQ1_S_kv=q4_0_vv=q4_0_th=24_tb=48_ctx=131072_fa_tensor_params_balanced.txt
109
tensor_overrides/Kimi-K2-Instruct-UD-IQ1_S-00001-of-00006_Kimi-K2-Instruct-UD-IQ1_S_kv=q4_0_vv=q4_0_th=24_tb=48_ctx=131072_fa_tensor_params_original.txt
10+
debug_scan.py
11+
debug_regex.py
12+
debug_glm_file.py
13+
/tensor_overrides
14+
test_fix.py
15+
test_more_patterns.py
16+
test_regex_fix.py

config.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ def generate_default_config_name(self):
8282
"no_mmap": False, # Default for --no-mmap flag
8383
"mlock": False, # Default for --mlock flag
8484
"no_kv_offload": False, # Default for --no-kv-offload flag
85+
"cpu_moe": False, # Default for --cpu-moe flag
86+
"n_cpu_moe": "", # Default for --n-cpu-moe (empty)
8587

8688
# Chat template parameters and custom parameters are deliberately excluded from default name generation
8789
}
@@ -107,6 +109,8 @@ def generate_default_config_name(self):
107109
"no_mmap": self.launcher.no_mmap.get(), # bool
108110
"mlock": self.launcher.mlock.get(), # bool
109111
"no_kv_offload": self.launcher.no_kv_offload.get(), # bool
112+
"cpu_moe": self.launcher.cpu_moe.get(), # bool
113+
"n_cpu_moe": self.launcher.n_cpu_moe.get().strip(),
110114

111115
}
112116

@@ -142,6 +146,7 @@ def generate_default_config_name(self):
142146
"no_kv_offload": "no-kv-offload",
143147

144148
"ignore_eos": "no-eos",
149+
"cpu_moe": "cpu-moe",
145150
}
146151
parts.append(flag_name_map.get(key, key.replace('_', '-'))) # Use mapped name or just key
147152
# Handle other string parameters
@@ -270,6 +275,9 @@ def current_cfg(self):
270275
# --- NEW: Add new parameters to config ---
271276
"ignore_eos": self.launcher.ignore_eos.get(),
272277
"n_predict": self.launcher.n_predict.get(),
278+
# --- MoE CPU parameters ---
279+
"cpu_moe": self.launcher.cpu_moe.get(),
280+
"n_cpu_moe": self.launcher.n_cpu_moe.get(),
273281
# --- CHANGES FOR JSON TEMPLATES / DEFAULT OPTION ---
274282
# Save the new template source variable
275283
"template_source": self.launcher.template_source.get(),
@@ -338,6 +346,9 @@ def load_configuration(self):
338346
# --- NEW: Load new parameters ---
339347
self.launcher.ignore_eos.set(cfg.get("ignore_eos", False))
340348
self.launcher.n_predict.set(cfg.get("n_predict", "-1")) # Default -1 for backward compatibility
349+
# --- MoE CPU parameters ---
350+
self.launcher.cpu_moe.set(cfg.get("cpu_moe", False))
351+
self.launcher.n_cpu_moe.set(cfg.get("n_cpu_moe", ""))
341352
# --- NEW: Load Custom Parameters ---
342353
# Default to empty list [] for backward compatibility with older configs
343354
self.launcher.custom_parameters_list = cfg.get("custom_parameters", [])

launch.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,11 @@ def build_cmd(self):
194194

195195
# Performance options
196196
self.add_arg(cmd, "--prio", self.launcher.prio.get(), "0") # Omit if 0 (default)
197+
198+
# --- MoE CPU options (only for llama.cpp backend) ---
199+
if backend != "ik_llama":
200+
self.add_arg(cmd, "--cpu-moe", self.launcher.cpu_moe.get()) # Omit if False (default)
201+
self.add_arg(cmd, "--n-cpu-moe", self.launcher.n_cpu_moe.get(), "") # Omit if empty (default)
197202

198203
# --- NEW: Generation options ---
199204
self.add_arg(cmd, "--ignore-eos", self.launcher.ignore_eos.get()) # Omit if False (default)

llamacpp-server-launcher.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,10 @@ def __init__(self, root: tk.Tk):
277277
# --- New Parameters ---
278278
self.ignore_eos = tk.BooleanVar(value=False) # --ignore-eos
279279
self.n_predict = tk.StringVar(value="-1") # --n-predict
280+
281+
# --- MoE CPU Parameters ---
282+
self.cpu_moe = tk.BooleanVar(value=False) # --cpu-moe
283+
self.n_cpu_moe = tk.StringVar(value="") # --n-cpu-moe
280284

281285
# --- Chat Template Selection Variables ---
282286
# Controls which template source is used: 'default', 'predefined', or 'custom'.
@@ -417,6 +421,9 @@ def __init__(self, root: tk.Tk):
417421
self.n_predict.trace_add("write", lambda *args: self._update_default_config_name_if_needed())
418422
# Bind trace to ignore_eos to update default config name if needed
419423
self.ignore_eos.trace_add("write", lambda *args: self._update_default_config_name_if_needed())
424+
# Bind trace to MoE CPU parameters to update default config name if needed
425+
self.cpu_moe.trace_add("write", lambda *args: self._update_default_config_name_if_needed())
426+
self.n_cpu_moe.trace_add("write", lambda *args: self._update_default_config_name_if_needed())
420427
# Bind trace to other variables that affect the default config name
421428
self.cache_type_k.trace_add("write", lambda *args: self._update_default_config_name_if_needed())
422429
self.threads.trace_add("write", lambda *args: self._update_default_config_name_if_needed())
@@ -1147,6 +1154,21 @@ def _setup_advanced_tab(self, parent):
11471154
self.prio_combo.grid(column=1, row=r, sticky="w", padx=5, pady=3); r += 1
11481155
ttk.Label(inner, text="0=Normal, 1=Medium, 2=High, 3=Realtime (OS dependent)", font=("TkSmallCaptionFont"))\
11491156
.grid(column=2, row=r-1, columnspan=2, sticky="w", padx=5, pady=3); # Re-grid label
1157+
1158+
# --- MoE CPU Settings --- (same row)
1159+
ttk.Label(inner, text="MoE CPU Settings:")\
1160+
.grid(column=0, row=r, sticky="w", padx=10, pady=3)
1161+
moe_frame = ttk.Frame(inner)
1162+
moe_frame.grid(column=1, row=r, columnspan=3, sticky="w", padx=5, pady=3)
1163+
1164+
self.cpu_moe_check = ttk.Checkbutton(moe_frame, text="Keep all MoE in CPU (--cpu-moe)", variable=self.cpu_moe, state=tk.NORMAL)
1165+
self.cpu_moe_check.pack(side="left", padx=(0, 10))
1166+
1167+
ttk.Label(moe_frame, text="First N layers in CPU (--n-cpu-moe):")\
1168+
.pack(side="left", padx=(0, 5))
1169+
self.n_cpu_moe_entry = ttk.Entry(moe_frame, textvariable=self.n_cpu_moe, width=8, state=tk.NORMAL)
1170+
self.n_cpu_moe_entry.pack(side="left")
1171+
r += 1
11501172

11511173

11521174
# --- NEW: Generation Settings ---
@@ -1647,7 +1669,8 @@ def _scan_model_dirs(self):
16471669
print("DEBUG: _scan_model_dirs thread started", file=sys.stderr)
16481670
found = {} # {display_name: full_path_obj}
16491671
# Pattern to match multi-part files like model-00001-of-00005.gguf or model-F1.gguf
1650-
multipart_pattern = re.compile(r"^(.*?)(?:-\d{5}-of-\d{5}|-F\d+)\.gguf$", re.IGNORECASE)
1672+
# Note: -F[1-9] only matches single-digit F parts to avoid matching precision indicators like F16, F32
1673+
multipart_pattern = re.compile(r"^(.*?)(?:-\d{5}-of-\d{5}|-F[1-9])\.gguf$", re.IGNORECASE)
16511674
# Pattern to match the FIRST part of a multi-part file (e.g., model-00001-of-00005.gguf or model-F1.gguf)
16521675
first_part_pattern = re.compile(r"^(.*?)-(?:00001-of-\d{5}|F1)\.gguf$", re.IGNORECASE)
16531676

version

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2025-07-22-0
1+
2025-08-05-0

0 commit comments

Comments
 (0)