|
59 | 59 | # dry_seq_break_max = 128 |
60 | 60 |
|
61 | 61 | # global vars |
62 | | -KcppVersion = "1.90105" |
63 | | -LcppVersion = "b5226" |
64 | | -EsoboldVersion = "RMv1.8.3+7c" |
| 62 | +KcppVersion = "1.90110" |
| 63 | +LcppVersion = "b5232" |
| 64 | +EsoboldVersion = "RMv1.9.1" |
65 | 65 | CudaSpecifics = "Cu128_Ar86_SMC2_DmmvX32Y1" |
66 | | -ReleaseDate = "2025/04/30" |
| 66 | +ReleaseDate = "2025/05/02" |
67 | 67 | showdebug = True |
68 | 68 | # guimode = False |
69 | 69 | kcpp_instance = None #global running instance |
@@ -2766,10 +2766,11 @@ def transform_genparams(genparams, api_format): |
2766 | 2766 | presence_penalty = genparams.get('presence_penalty', genparams.get('frequency_penalty', 0.0)) |
2767 | 2767 | genparams["presence_penalty"] = tryparsefloat(presence_penalty,0.0) |
2768 | 2768 | # openai allows either a string or a list as a stop sequence |
2769 | | - if isinstance(genparams.get('stop',[]), list): |
2770 | | - genparams["stop_sequence"] = genparams.get('stop', []) |
2771 | | - else: |
2772 | | - genparams["stop_sequence"] = [genparams.get('stop')] |
| 2769 | + if genparams.get('stop',[]) is not None: |
| 2770 | + if isinstance(genparams.get('stop',[]), list): |
| 2771 | + genparams["stop_sequence"] = genparams.get('stop', []) |
| 2772 | + else: |
| 2773 | + genparams["stop_sequence"] = [genparams.get('stop')] |
2773 | 2774 |
|
2774 | 2775 | genparams["sampler_seed"] = tryparseint(genparams.get('seed', -1),-1) |
2775 | 2776 | genparams["mirostat"] = genparams.get('mirostat_mode', 0) |
@@ -2964,7 +2965,8 @@ def transform_genparams(genparams, api_format): |
2964 | 2965 | ollamasysprompt = genparams.get('system', "") |
2965 | 2966 | ollamabodyprompt = f"{detokstr}{user_message_start}{genparams.get('prompt', '')}{assistant_message_start}" |
2966 | 2967 | ollamaopts = genparams.get('options', {}) |
2967 | | - genparams["stop_sequence"] = genparams.get('stop', []) |
| 2968 | + if genparams.get('stop',[]) is not None: |
| 2969 | + genparams["stop_sequence"] = genparams.get('stop', []) |
2968 | 2970 | if "num_predict" in ollamaopts: |
2969 | 2971 | genparams["max_length"] = ollamaopts.get('num_predict', args.defaultgenamt) |
2970 | 2972 | if "num_ctx" in ollamaopts: |
@@ -4819,9 +4821,12 @@ def zenity(filetypes=None, initialdir="", initialfile="", **kwargs) -> Tuple[int |
4819 | 4821 | if sys.platform != "linux": |
4820 | 4822 | raise Exception("Zenity GUI is only usable on Linux, attempting to use TK GUI.") |
4821 | 4823 | zenity_bin = shutil.which("yad") |
| 4824 | + using_yad = True |
4822 | 4825 | if not zenity_bin: |
4823 | 4826 | zenity_bin = shutil.which("zenity") |
| 4827 | + using_yad = False |
4824 | 4828 | if not zenity_bin: |
| 4829 | + using_yad = False |
4825 | 4830 | raise Exception("Zenity not present, falling back to TK GUI.") |
4826 | 4831 |
|
4827 | 4832 | def zenity_clean(txt: str): |
@@ -4850,7 +4855,7 @@ def zenity_sanity_check(zenity_bin): #make sure zenity is sane |
4850 | 4855 | raise Exception("Zenity not working correctly, falling back to TK GUI.") |
4851 | 4856 |
|
4852 | 4857 | # Build args based on keywords |
4853 | | - args = ['/usr/bin/env', zenity_bin, '--file-selection'] |
| 4858 | + args = ['/usr/bin/env', zenity_bin, ('--file' if using_yad else '--file-selection')] |
4854 | 4859 | for k, v in kwargs.items(): |
4855 | 4860 | if v is True: |
4856 | 4861 | args.append(f'--{k.replace("_", "-").strip("-")}') |
@@ -7492,7 +7497,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False): |
7492 | 7497 | global maxctx |
7493 | 7498 | maxctx = args.contextsize |
7494 | 7499 |
|
7495 | | - args.defaultgenamt = max(128, min(args.defaultgenamt, 2048)) |
| 7500 | + args.defaultgenamt = max(128, min(args.defaultgenamt, 4096)) |
7496 | 7501 | args.defaultgenamt = min(args.defaultgenamt, maxctx / 2) |
7497 | 7502 |
|
7498 | 7503 | if args.nocertify: |
@@ -8176,12 +8181,11 @@ def range_checker(arg: str): |
8176 | 8181 | advparser.add_argument("--exporttemplate", help="Exports the current selected arguments as a .kcppt template file", metavar=('[filename]'), type=str, default="") |
8177 | 8182 | advparser.add_argument("--nomodel", help="Allows you to launch the GUI alone, without selecting any model.", action='store_true') |
8178 | 8183 | advparser.add_argument("--moeexperts", metavar=('[num of experts]'), help="How many experts to use for MoE models (default=follow gguf)", type=int, default=-1) |
8179 | | - |
8180 | 8184 | advparser.add_argument("--normrmseps", metavar=('[norm rms eps]'), help="Override Norm RMS Epsilon value to use for the model. Useful for <2bpw quants mainly. Example of format: 1.95e-05 (default=follow gguf)", type=float, default=-1.0) |
8181 | 8185 | advparser.add_argument("--poslayeroffset", help="Removes or adds a layer to the GPU layers autoloader calculation in case of OOM or under-exploitation.", type=check_range(int,0,10), default=0) |
8182 | 8186 | advparser.add_argument("--neglayeroffset", help="Removes or adds a layer to the GPU layers autoloader calculation in case of OOM or under-exploitation.", type=check_range(int,0,10), default=0) |
8183 | 8187 |
|
8184 | | - advparser.add_argument("--defaultgenamt", help="How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.", type=check_range(int,128,2048), default=512) |
| 8188 | + advparser.add_argument("--defaultgenamt", help="How many tokens to generate by default, if not specified. Must be smaller than context size. Usually, your frontend GUI will override this.", type=check_range(int,64,4096), default=512) |
8185 | 8189 | advparser.add_argument("--nobostoken", help="Prevents BOS token from being added at the start of any prompt. Usually NOT recommended for most models.", action='store_true') |
8186 | 8190 | advparser.add_argument("--maxrequestsize", metavar=('[size in MB]'), help="Specify a max request payload size. Any requests to the server larger than this size will be dropped. Do not change if unsure.", type=int, default=32) |
8187 | 8191 |
|
|
0 commit comments