Skip to content

Commit 645b09e

Browse files
committed
renamed promptlimit to genlimit, now applies to API requests as well, can be set in the ui. hide API info display if running in CLI mode.
1 parent 3060dfb commit 645b09e

File tree

2 files changed

+27
-3
lines changed

2 files changed

+27
-3
lines changed

klite.embd

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3725,6 +3725,15 @@ Current version indicated by LITEVER below.
37253725
"name":"OpenAI Harmony",
37263726
"user":"<|start|>user<|message|>",
37273727
"user_end":"<|end|>",
3728+
"assistant":"<|start|>assistant",
3729+
"assistant_end":"<|end|>",
3730+
"system":"<|start|>developer<|message|>",
3731+
"system_end":"<|end|>",
3732+
},
3733+
{
3734+
"name":"OpenAI Harmony Non-Thinking",
3735+
"user":"<|start|>user<|message|>",
3736+
"user_end":"<|end|>",
37283737
"assistant":"<|start|>assistant<|channel|>final<|message|>",
37293738
"assistant_end":"<|end|>",
37303739
"system":"<|start|>developer<|message|>",
@@ -17063,6 +17072,8 @@ Current version indicated by LITEVER below.
1706317072
}
1706417073
}
1706517074
}
17075+
//special case for GPT-OSS, never use it as a stop sequence or stuff gets messed
17076+
seqs = seqs.filter(itm => !itm.includes("<|start|>assistant"));
1706617077
if(!localsettings.includedefaultstops)
1706717078
{
1706817079
seqs = [];
@@ -19012,6 +19023,9 @@ Current version indicated by LITEVER below.
1901219023
}
1901319024
}
1901419025

19026+
//special case for GPT-OSS, never use it as a stop sequence or stuff gets messed
19027+
stripping_arr = stripping_arr.filter(itm => !itm.includes("<|start|>assistant"));
19028+
1901519029
//sometimes the OAI type endpoints get confused and repeat the instruct tag, so trim it
1901619030
for(let i=0;i<stripping_arr.length;++i)
1901719031
{

koboldcpp.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,6 +1510,8 @@ def generate(genparams, stream_flag=False):
15101510
max_context_length = maxctx
15111511
min_remain_hardlimit = max(min(max_context_length-4, 16),int(max_context_length*0.2))
15121512
min_remain_softlimit = max(min(max_context_length-4, 16),int(max_context_length*0.4))
1513+
if args.genlimit > 0 and max_length > args.genlimit:
1514+
max_length = args.genlimit
15131515
if max_length >= (max_context_length-min_remain_softlimit):
15141516
print(f"\n!!! ====== !!!\nWarning: You are trying to generate text with max_length ({max_length}) near or exceeding max_context_length limit ({max_context_length}).\nMost of the context will be removed, and your outputs will not be very coherent.\nConsider launching with increased --contextsize to avoid issues.\n!!! ====== !!!")
15151517
if max_length >= (max_context_length-min_remain_hardlimit):
@@ -4555,6 +4557,7 @@ def hide_tooltip(event):
45554557
moeexperts_var = ctk.StringVar(value=str(-1))
45564558
moecpu_var = ctk.StringVar(value=str(0))
45574559
defaultgenamt_var = ctk.StringVar(value=str(768))
4560+
genlimit_var = ctk.StringVar(value=str(0))
45584561
nobostoken_var = ctk.IntVar(value=0)
45594562
override_kv_var = ctk.StringVar(value="")
45604563
override_tensors_var = ctk.StringVar(value="")
@@ -5223,6 +5226,7 @@ def changerunmode(a,b,c):
52235226
makeslider(tokens_tab, "Context Size:",contextsize_text, context_var, 0, len(contextsize_text)-1, 18, width=280, set=7,tooltip="What is the maximum context size to support. Model specific. You cannot exceed it.\nLarger contexts require more memory, and not all models support it.")
52245227
context_var.trace_add("write", changed_gpulayers_estimate)
52255228
makelabelentry(tokens_tab, "Default Gen Amt:", defaultgenamt_var, row=20, padx=120, singleline=True, tooltip="How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.")
5229+
makelabelentry(tokens_tab, "Prompt Limit:", genlimit_var, row=20, padx=300, singleline=True, tooltip="If set, restricts max output tokens to this limit regardless of API request. Set to 0 to disable.",labelpadx=210)
52265230

52275231
nativectx_entry, nativectx_label = makelabelentry(tokens_tab, "Override Native Context:", customrope_nativectx, row=23, padx=146, singleline=True, tooltip="Overrides the native trained context of the loaded model with a custom value to be used for Rope scaling.")
52285232
customrope_scale_entry, customrope_scale_label = makelabelentry(tokens_tab, "RoPE Scale:", customrope_scale, row=23, padx=100, singleline=True, tooltip="For Linear RoPE scaling. RoPE frequency scale.")
@@ -5559,6 +5563,7 @@ def export_vars():
55595563
args.moeexperts = int(moeexperts_var.get()) if moeexperts_var.get()!="" else -1
55605564
args.moecpu = int(moecpu_var.get()) if moecpu_var.get()!="" else 0
55615565
args.defaultgenamt = int(defaultgenamt_var.get()) if defaultgenamt_var.get()!="" else 768
5566+
args.genlimit = int(genlimit_var.get()) if genlimit_var.get()!="" else 0
55625567
args.nobostoken = (nobostoken_var.get()==1)
55635568
args.enableguidance = (enableguidance_var.get()==1)
55645569
args.overridekv = None if override_kv_var.get() == "" else override_kv_var.get()
@@ -5784,6 +5789,10 @@ def import_vars(dict):
57845789
moecpu_var.set(dict["moecpu"])
57855790
if "defaultgenamt" in dict and dict["defaultgenamt"]:
57865791
defaultgenamt_var.set(dict["defaultgenamt"])
5792+
if "genlimit" in dict and dict["genlimit"]:
5793+
genlimit_var.set(dict["genlimit"])
5794+
else:
5795+
genlimit_var.set(str(0))
57875796
nobostoken_var.set(dict["nobostoken"] if ("nobostoken" in dict) else 0)
57885797
enableguidance_var.set(dict["enableguidance"] if ("enableguidance" in dict) else 0)
57895798
if "overridekv" in dict and dict["overridekv"]:
@@ -7362,7 +7371,8 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
73627371

73637372
print(f"======\nActive Modules: {' '.join(enabledmlist)}")
73647373
print(f"Inactive Modules: {' '.join(disabledmlist)}")
7365-
print(f"Enabled APIs: {' '.join(apimlist)}")
7374+
if not args.cli:
7375+
print(f"Enabled APIs: {' '.join(apimlist)}")
73667376

73677377
global sslvalid
73687378
if args.ssl:
@@ -7447,7 +7457,7 @@ def onready_subprocess():
74477457
else:
74487458
save_to_file = (args.benchmark and args.benchmark!="stdout" and args.benchmark!="")
74497459
benchmaxctx = maxctx
7450-
benchlen = args.promptlimit
7460+
benchlen = args.genlimit if args.genlimit > 0 else 100
74517461
benchtemp = 0.1
74527462
benchtopk = 1
74537463
benchreppen = 1
@@ -7593,7 +7603,7 @@ def range_checker(arg: str):
75937603
advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None)
75947604
advparser.add_argument("--prompt", metavar=('[prompt]'), help="Passing a prompt string triggers a direct inference, loading the model, outputs the response to stdout and exits. Can be used alone or with benchmark.", type=str, default="")
75957605
advparser.add_argument("--cli", help="Does not launch KoboldCpp HTTP server. Instead, enables KoboldCpp from the command line, accepting interactive console input and displaying responses to the terminal.", action='store_true')
7596-
advparser.add_argument("--promptlimit", help="Sets the maximum number of generated tokens, usable only with --prompt or --benchmark",metavar=('[token limit]'), type=int, default=100)
7606+
advparser.add_argument("--genlimit","--promptlimit", help="Sets the maximum number of generated tokens, it will restrict all generations to this or lower. Also usable with --prompt or --benchmark.",metavar=('[token limit]'), type=int, default=0)
75977607
advparser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", metavar=('limit'), nargs='?', const=1, type=int, default=1)
75987608
advparser.add_argument("--multiplayer", help="Hosts a shared multiplayer session that others can join.", action='store_true')
75997609
advparser.add_argument("--websearch", help="Enable the local search engine proxy so Web Searches can be done.", action='store_true')

0 commit comments

Comments
 (0)