Skip to content

Commit d3510f4

Browse files
committed
Typo and notes for KV quants values in MGPU-AL
1 parent e3cb3ba commit d3510f4

File tree

1 file changed

+25
-25
lines changed

1 file changed

+25
-25
lines changed

koboldcpp.py

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,51 +1292,51 @@ def autoset_gpu_layers(ctxsize,sdquanted,blasbatchsize,flashattention,quantkv,mm
12921292

12931293
kvq = quantkv
12941294
kvbpw = 0
1295-
if kvq == 0:
1295+
if kvq == 0: # F16
12961296
kvbpw = 32
1297-
if kvq == 1:
1297+
if kvq == 1: # Q8_0
12981298
kvbpw = 17
1299-
if kvq == 2:
1299+
if kvq == 2: # 4_0
13001300
kvbpw = 9
1301-
if kvq == 3:
1301+
if kvq == 3: # F16-Q8_0
13021302
kvbpw = 24.5
1303-
if kvq == 4:
1303+
if kvq == 4: # F16-Q6_0
13041304
kvbpw = 22.5
1305-
if kvq == 5:
1305+
if kvq == 5: # Q8_0-Q6_0
13061306
kvbpw = 15
1307-
if kvq == 6:
1307+
if kvq == 6: # Q8_0-Q5_0
13081308
kvbpw = 14
1309-
if kvq == 7:
1309+
if kvq == 7: # Q8_0-IQ4_NL
13101310
kvbpw = 13
1311-
if kvq == 8:
1311+
if kvq == 8: # Q6_0-Q6_0
13121312
kvbpw = 13
1313-
if kvq == 9:
1313+
if kvq == 9: # Q6_0-Q5_0
13141314
kvbpw = 12
1315-
if kvq == 10:
1315+
if kvq == 10: # Q6_0-IQ4_NL
13161316
kvbpw = 11
1317-
if kvq == 11:
1317+
if kvq == 11: # Q5_1-Q5_0
13181318
kvbpw = 11
1319-
if kvq == 12:
1319+
if kvq == 12: # Q5_1-IQ4_NL
13201320
kvbpw = 10.5
1321-
if kvq == 13:
1321+
if kvq == 13: # Q5_0-IQ4_NL
13221322
kvbpw = 10
1323-
if kvq == 14:
1323+
if kvq == 14: # IQ4_NL-IQ4_NL
13241324
kvbpw = 9
1325-
if kvq == 15:
1325+
if kvq == 15: # BF16
13261326
kvbpw = 32
1327-
if kvq == 16:
1327+
if kvq == 16: # Q8_0-F16
13281328
kvbpw = 24.5
1329-
if kvq == 17:
1329+
if kvq == 17: # Q6_0-F16
13301330
kvbpw = 22.5
1331-
if kvq == 18:
1331+
if kvq == 18: # Q5_1-F16
13321332
kvbpw = 22
1333-
if kvq == 19:
1333+
if kvq == 19: # Q5_0-F16
13341334
kvbpw = 21.5
1335-
if kvq == 20:
1335+
if kvq == 20: # Q4_1-F16
13361336
kvbpw = 21
1337-
if kvq == 21:
1337+
if kvq == 21: # Q4_0-F16
13381338
kvbpw = 20.5
1339-
if kvq == 22:
1339+
if kvq == 22: # IQ4_NL-F16
13401340
kvbpw = 20.5
13411341

13421342
# if modelfile_extracted_meta[5] > 1024*1024*10: #draft model tax
@@ -3880,7 +3880,7 @@ def hide_tooltip(event):
38803880
"8* - K q6_0 - V q6_0 (6.5BPW) - FA. Doesn't work on Gemma 2 FA.",
38813881
"9 - K q6_0 - V q5_0 (6BPW) - FA, best game in FA town. Doesn't work on Gemma 2 FA.",
38823882
"10* - K q6_0 - V iq4_nl (5.5BPW) - FA - faulty on some models (Gemma 2 FA. Qwen 2.5 1.5b?)",
3883-
"11 - K q5_1 - V q5_0 (5.5BPW) - FA - possibly faulty on some models (Qwen 2.5 1.5b?)",
3883+
"11 - K q5_1 - V q5_0 (5.75BPW) - FA - possibly faulty on some models (Qwen 2.5 1.5b?)",
38843884
"12* - K q5_1 - V iq4_nl (5.25BPW) - FA",
38853885
"13 - K q5_0 - V iq4_nl (5BPW) - FA - possibly faulty on some models (Qwen 2.5 1.5b?)",
38863886
"14 - K iq4_nl - V iq4_nl (4.5BPW) - FA",
@@ -6520,7 +6520,7 @@ def range_checker(arg: str):
65206520
advparser.add_argument("--ignoremissing", help="Ignores all missing non-essential files, just skipping them instead.", action='store_true')
65216521
advparser.add_argument("--chatcompletionsadapter", metavar=('[filename]'), help="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.", default="AutoGuess")
65226522
advparser.add_argument("--flashattention", help="Enables flash attention.", action='store_true')
6523-
advparser.add_argument("--quantkv", help="Sets the KV cache data quantization (KVQ) type to save VRAM in NVidia Video Cards, 0 - F16 (16BPW) - FA or not, 1 - q8_0 - (8.5BPW) - FA, 2 - q4_0 - (4.5BPW) - FA, 3 - K F16 - V q8_0 (12.25BPW) - FA, 4 - K F16 - V q6_0 (11.25BPW) - FA, 5 - K q8_0 - V q6_0 (7.5BPW) - FA, 6 - K q8_0 - V q5_0 (7BPW), slower, best FA game in town, 7 - K q8_0 - V iq4_nl (6.5BPW) - FA, 8 - K q6_0 - V q6_0 (6.5BPW) - FA, 9 - K q6_0 - V q5_0 (6BPW) - FA, 10 - K q6_0 - V iq4_nl (5.5BPW) - FA, 11 - K q5_1 - V q5_0 (5.5BPW) - FA, 12 - K q5_1 - V iq4_nl (5.25BPW) - FA, 13 - K q5_0 - V iq4_nl (5BPW) - FA, 14 - K iq4_nl - V iq4_nl (4.5BPW) - FA, 15 - BF16 (16BPW) - no FA, slower, 16 - K q8_0 - V F16 (12.25BPW) - NO FA, slower, 17 - K q6_0 - V F16 (11.25BPW) - NO FA, slower, best non-FA game in town, 18 - K q5_1 - V F16 (11BPW) - NO FA, slower, 19 - K q5_0 - V F16 (11.75BPW) - NO FA, slower, 20 - K q4_1 - V F16 (10.5BPW) - NO FA, slower, 21 - K q4-0 - V F16 (10.25BPW) - NO FA, slower, 22 - K iq4_nl - V F16 (10.25BPW) - NO FA, slower.", metavar=('[quantization level 0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22]'), type=check_range(int,0,22), default=0)
6523+
advparser.add_argument("--quantkv", help="Sets the KV cache data quantization (KVQ) type to save VRAM in NVidia Video Cards, 0 - F16 (16BPW) - FA or not, 1 - q8_0 - (8.5BPW) - FA, 2 - q4_0 - (4.5BPW) - FA, 3 - K F16 - V q8_0 (12.25BPW) - FA, 4 - K F16 - V q6_0 (11.25BPW) - FA, 5 - K q8_0 - V q6_0 (7.5BPW) - FA, 6 - K q8_0 - V q5_0 (7BPW), slower, best FA game in town, 7 - K q8_0 - V iq4_nl (6.5BPW) - FA, 8 - K q6_0 - V q6_0 (6.5BPW) - FA, 9 - K q6_0 - V q5_0 (6BPW) - FA, 10 - K q6_0 - V iq4_nl (5.5BPW) - FA, 11 - K q5_1 - V q5_0 (5.75BPW) - FA, 12 - K q5_1 - V iq4_nl (5.25BPW) - FA, 13 - K q5_0 - V iq4_nl (5BPW) - FA, 14 - K iq4_nl - V iq4_nl (4.5BPW) - FA, 15 - BF16 (16BPW) - no FA, slower, 16 - K q8_0 - V F16 (12.25BPW) - NO FA, slower, 17 - K q6_0 - V F16 (11.25BPW) - NO FA, slower, best non-FA game in town, 18 - K q5_1 - V F16 (11BPW) - NO FA, slower, 19 - K q5_0 - V F16 (11.75BPW) - NO FA, slower, 20 - K q4_1 - V F16 (10.5BPW) - NO FA, slower, 21 - K q4-0 - V F16 (10.25BPW) - NO FA, slower, 22 - K iq4_nl - V F16 (10.25BPW) - NO FA, slower.", metavar=('[quantization level 0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22]'), type=check_range(int,0,22), default=0)
65246524
advparser.add_argument("--forceversion", help="If the model file format detection fails (e.g. rogue modified model) you can set this to override the detected format (enter desired version, e.g. 401 for GPTNeoX-Type2).",metavar=('[version]'), type=int, default=0)
65256525
advparser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently. Outdated. Not recommended.", action='store_true')
65266526
advparser.add_argument("--unpack", help="Extracts the file contents of the KoboldCpp/Croco.Cpp binary into a target directory.", metavar=('destination'), type=str, default="")

0 commit comments

Comments
 (0)