You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
tensor_split_entry,tensor_split_label=makelabelentry(gpu_al_tab, "Tensor Split:", tensor_split_str_vars, 8, 160, tooltip='When using multiple GPUs this option controls how large tensors should be split across all GPUs.\nUses a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order.\nFor example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1.')
3240
3244
3245
+
3246
+
3241
3247
# load model
3242
3248
makefileentry(gpu_al_tab, "Model:", "Select GGML Model File", model_var, 40, 576, onchoosefile=on_picked_model_file, filetypes=[("GGML bin or GGUF", ("*.bin","*.gguf"))] ,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.")
3243
3249
@@ -3269,6 +3275,8 @@ def togglerope(a,b,c):
3269
3275
noqkvlabel.configure(text_color="#ff5555")
3270
3276
qkvslider,qkvlabel,qkvtitle=makeslider(tokens_tab, "Quantize KV Cache:", quantkv_text, quantkv_var, 0, 22, 30, set=0,tooltip="Enable quantization of KV cache (KVQ). Mode 0 (F16) is default. Modes 1-12 requires FlashAttention and disables ContextShift.\nModes 15-20 work without FA, for incompatible models. 0,13,14 can work with or without.")
3271
3277
3278
+
3279
+
3272
3280
# load model
3273
3281
makefileentry(tokens_tab, "Model:", "Select GGML or GGML Model File", model_var, 50, 576, onchoosefile=on_picked_model_file, filetypes=[("GGML bin or GGUF", ("*.bin","*.gguf"))] ,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.")
makelabel(extra_tab, "Export as launcher .kcppt template (Expert Only)", 4, 0,tooltiptxt="Creates a KoboldCpp launch template for others to use.\nEmbeds JSON files directly into exported file when saving.\nWhen loaded, forces the backend to be automatically determined.\nWarning! Not recommended for beginners!")
# makelabelentry(croco_tab, "Context Size:" , context_var, 2, 160,tooltip="How many threads to use during BLAS processing.\nIf left blank, uses same value as regular thread count.")
3439
+
3440
+
makelabelentry(croco_tab, "Threads:" , threads_var, 4, 80,tooltip="How many threads to use.\nRecommended value is your CPU core count, defaults are usually OK.")
3441
+
3442
+
makelabelentry(croco_tab, "BLAS threads:" , blas_threads_var, 6, 80,tooltip="How many threads to use during BLAS processing.\nIf left blank, uses same value as regular thread count.")
3443
+
3444
+
# makelabelentry(croco_tab, "Logical Blas Batch Size:" , blas_size_var, 8, 160,tooltip="How many tokens to process at once per batch.\nLarger values use more memory unless Physical Batch supersedes it.")
3445
+
3446
+
# makelabelentry(croco_tab, "Physical Blas Batch Size:" , blasubatchsize_var, 10, 160,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")
3447
+
3448
+
makelabelentry(croco_tab, "GPU Layers:", gpulayers_var, 12, 80,tooltip="How many layers to offload onto the GPU.\nVRAM intensive, usage increases with model and context size.\nRequires some trial and error to find the best fit value.\n\nCommon values for total layers, accuracy not guaranteed.\n\nLlama/Mistral 7b/8b: 33\nSolar 10.7b/11b: 49\nLlama 13b: 41\nLlama 20b(stack): 63\nLlama/Yi 34b: 61\nMixtral 8x7b: 33\nLlama 70b: 81")
3449
+
3450
+
makelabelentry(croco_tab, "Positive Layer offset:", poslayeroffset_var, 14, 80, tooltip="Adds layers to the GPU layers autoloader calculation in case of under-exploitation of your GPU(s)..")
3451
+
3452
+
makelabelentry(croco_tab, "Negative Layer Offset:", neglayeroffset_var, 16, 80, tooltip="Removes layers to the GPU layers autoloader calculation in case of Out of Memory (OOM) error..")
3453
+
3454
+
makelabelentry(croco_tab, "Tensor Split:", tensor_split_str_vars, 18, 280, tooltip='When using multiple GPUs this option controls how large tensors should be split across all GPUs.\nUses a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order.\nFor example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1.')
3455
+
3456
+
makelabelentry(croco_tab, "RoPE Scale:", customrope_scale, 20, 80, tooltip="For Linear RoPE scaling. RoPE frequency scale.")
makelabelentry(croco_tab, "Quantize KV Cache:", quantkv_var, 24, 80, tooltip="Enable quantization of KV cache (KVQ). Mode 0 (F16) is default. Modes 1-12 requires FlashAttention and disables ContextShift.\nModes 15-20 work without FA, for incompatible models. 0,13,14 can work with or without.")
3461
+
3462
+
makelabelentry(croco_tab, "Opt. model metadata KV override:", kv_override_var, 26, 420, tooltip="Supersede metadata of a model, like Epislon _ e.g : llama.attention.layer_norm_rms_epsilon=float:1e5, 1.25e5, 3e6, etc.")
3463
+
3464
+
makefileentry(croco_tab, "Model:", "Select GGML or GGML Model File", model_var, 28, 576, onchoosefile=on_picked_model_file, filetypes=[("GGML bin or GGUF", ("*.bin","*.gguf"))] ,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.")
advparser.add_argument("--ssl", help="Allows all content to be served over SSL instead. A valid UNENCRYPTED SSL cert and key .pem files must be provided", metavar=('[cert_pem]', '[key_pem]'), nargs='+')
5015
5061
advparser.add_argument("--nocertify", help="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.", action='store_true')
5016
5062
advparser.add_argument("--mmproj", help="Select a multimodal projector file for LLaVA.", default="")
5063
+
5064
+
advparser.add_argument("--kv_override", help="Supersede metadata of a model, like Epislon (e.g : llama.attention.layer_norm_rms_epsilon=float:1e5, 1.25e5, 3e6, etc)", metavar=('[kv_override]'), nargs='+')
5065
+
5017
5066
advparser.add_argument("--password", help="Enter a password required to use this instance. This key will be required for all text endpoints. Image endpoints are not secured.", default=None)
5018
5067
advparser.add_argument("--ignoremissing", help="Ignores all missing non-essential files, just skipping them instead.", action='store_true')
5019
5068
advparser.add_argument("--chatcompletionsadapter", help="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.", default="")
0 commit comments