Skip to content

Commit 08adfb5

Browse files
wbrunaLostRuins
andauthored
Configurable VAE threshold limit (#1601)
* add backend support for changing the VAE tiling threshold * trigger VAE tiling by image area instead of dimensions I've tested with GGML_VULKAN_MEMORY_DEBUG all resolutions with the same 768x768 area (even extremes like 64x9216), and many below that: all consistently allocate 6656 bytes per image pixel. As tiling is primarily useful to avoid excessive memory usage, it seems reasonable to enable VAE tiling based on area rather than maximum image side. However, as there is currently no user interface option to change it back to a lower value, it's best to maintain the default behavior for now. * replace the notile option with a configurable threshold This allows selecting a lower threshold value, reducing the peak memory usage. The legacy sdnotile parameter gets automatically converted to the new parameter, if it's the only one supplied. * simplify tiling checks, 768 default visible in launcher --------- Co-authored-by: Concedo <[email protected]>
1 parent caea524 commit 08adfb5

File tree

3 files changed

+19
-12
lines changed

3 files changed

+19
-12
lines changed

expose.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ struct sd_load_model_inputs
162162
const int threads = 0;
163163
const int quant = 0;
164164
const bool taesd = false;
165-
const bool notile = false;
165+
const int tiled_vae_threshold = 0;
166166
const char * t5xxl_filename = nullptr;
167167
const char * clipl_filename = nullptr;
168168
const char * clipg_filename = nullptr;

koboldcpp.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
default_visionmaxres = 1024
5353
net_save_slots = 10
5454
savestate_limit = 3 #3 savestate slots
55+
default_vae_tile_threshold = 768
5556

5657
# abuse prevention
5758
stop_token_max = 256
@@ -272,7 +273,7 @@ class sd_load_model_inputs(ctypes.Structure):
272273
("threads", ctypes.c_int),
273274
("quant", ctypes.c_int),
274275
("taesd", ctypes.c_bool),
275-
("notile", ctypes.c_bool),
276+
("tiled_vae_threshold", ctypes.c_int),
276277
("t5xxl_filename", ctypes.c_char_p),
277278
("clipl_filename", ctypes.c_char_p),
278279
("clipg_filename", ctypes.c_char_p),
@@ -1549,7 +1550,7 @@ def sd_load_model(model_filename,vae_filename,lora_filename,t5xxl_filename,clipl
15491550
inputs.threads = thds
15501551
inputs.quant = quant
15511552
inputs.taesd = True if args.sdvaeauto else False
1552-
inputs.notile = True if args.sdnotile else False
1553+
inputs.tiled_vae_threshold = args.sdtiledvae
15531554
inputs.vae_filename = vae_filename.encode("UTF-8")
15541555
inputs.lora_filename = lora_filename.encode("UTF-8")
15551556
inputs.lora_multiplier = args.sdloramult
@@ -4303,7 +4304,7 @@ def hide_tooltip(event):
43034304
sd_clipg_var = ctk.StringVar()
43044305
sd_photomaker_var = ctk.StringVar()
43054306
sd_vaeauto_var = ctk.IntVar(value=0)
4306-
sd_notile_var = ctk.IntVar(value=0)
4307+
sd_tiled_vae_var = ctk.StringVar(value=str(default_vae_tile_threshold))
43074308
sd_clamped_var = ctk.StringVar(value="0")
43084309
sd_clamped_soft_var = ctk.StringVar(value="0")
43094310
sd_threads_var = ctk.StringVar(value=str(default_threads))
@@ -5033,7 +5034,7 @@ def toggletaesd(a,b,c):
50335034
sdvaeitem2.grid()
50345035
sdvaeitem3.grid()
50355036
makecheckbox(images_tab, "Use TAE SD (AutoFix Broken VAE)", sd_vaeauto_var, 42,command=toggletaesd,tooltiptxt="Replace VAE with TAESD. May fix bad VAE.")
5036-
makecheckbox(images_tab, "No VAE Tiling", sd_notile_var, 44,tooltiptxt="Disables VAE tiling, may not work for large images.")
5037+
makelabelentry(images_tab, "VAE Tiling Threshold:", sd_tiled_vae_var, 44, 50, padx=144,singleline=True,tooltip="Enable VAE Tiling for images above this size, to save memory.\nSet to 0 to disable VAE tiling.")
50375038

50385039
# audio tab
50395040
audio_tab = tabcontent["Audio"]
@@ -5266,7 +5267,7 @@ def export_vars():
52665267
args.sdthreads = (0 if sd_threads_var.get()=="" else int(sd_threads_var.get()))
52675268
args.sdclamped = (0 if int(sd_clamped_var.get())<=0 else int(sd_clamped_var.get()))
52685269
args.sdclampedsoft = (0 if int(sd_clamped_soft_var.get())<=0 else int(sd_clamped_soft_var.get()))
5269-
args.sdnotile = (True if sd_notile_var.get()==1 else False)
5270+
args.sdtiledvae = (default_vae_tile_threshold if sd_tiled_vae_var.get()=="" else int(sd_tiled_vae_var.get()))
52705271
if sd_vaeauto_var.get()==1:
52715272
args.sdvaeauto = True
52725273
args.sdvae = ""
@@ -5488,7 +5489,8 @@ def import_vars(dict):
54885489
sd_clipg_var.set(dict["sdclipg"] if ("sdclipg" in dict and dict["sdclipg"]) else "")
54895490
sd_photomaker_var.set(dict["sdphotomaker"] if ("sdphotomaker" in dict and dict["sdphotomaker"]) else "")
54905491
sd_vaeauto_var.set(1 if ("sdvaeauto" in dict and dict["sdvaeauto"]) else 0)
5491-
sd_notile_var.set(1 if ("sdnotile" in dict and dict["sdnotile"]) else 0)
5492+
sd_tiled_vae_var.set(str(dict["sdtiledvae"]) if ("sdtiledvae" in dict and dict["sdtiledvae"]) else str(default_vae_tile_threshold))
5493+
54925494
sd_lora_var.set(dict["sdlora"] if ("sdlora" in dict and dict["sdlora"]) else "")
54935495
sd_loramult_var.set(str(dict["sdloramult"]) if ("sdloramult" in dict and dict["sdloramult"]) else "1.0")
54945496

@@ -5856,6 +5858,8 @@ def convert_invalid_args(args):
58565858
dict["model_param"] = model_value
58575859
elif isinstance(model_value, list) and model_value: # Non-empty list
58585860
dict["model_param"] = model_value[0] # Take the first file in the list
5861+
if "sdnotile" in dict and "sdtiledvae" not in dict:
5862+
dict["sdtiledvae"] = (0 if (dict["sdnotile"]) else default_vae_tile_threshold) # convert legacy option
58595863
return args
58605864

58615865
def setuptunnel(global_memory, has_sd):
@@ -7269,8 +7273,7 @@ def range_checker(arg: str):
72697273
sdparsergrouplora.add_argument("--sdquant", help="If specified, loads the model quantized to save memory.", action='store_true')
72707274
sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify an image generation LORA safetensors model to be applied.", default="")
72717275
sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the image LORA model to be applied.", type=float, default=1.0)
7272-
sdparsergroup.add_argument("--sdnotile", help="Disables VAE tiling, may not work for large images.", action='store_true')
7273-
7276+
sdparsergroup.add_argument("--sdtiledvae", metavar=('[maxres]'), help="Adjust the automatic VAE tiling trigger for images above this size. 0 disables vae tiling.", type=int, default=default_vae_tile_threshold)
72747277
whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands')
72757278
whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper .bin model to enable Speech-To-Text transcription.", default="")
72767279

@@ -7296,5 +7299,6 @@ def range_checker(arg: str):
72967299
deprecatedgroup.add_argument("--sdconfig", help=argparse.SUPPRESS, nargs='+')
72977300
compatgroup.add_argument("--noblas", help=argparse.SUPPRESS, action='store_true')
72987301
compatgroup3.add_argument("--nommap", help=argparse.SUPPRESS, action='store_true')
7302+
deprecatedgroup.add_argument("--sdnotile", help=argparse.SUPPRESS, action='store_true') # legacy option, see sdtiledvae
72997303

73007304
main(launch_args=parser.parse_args(),default_args=parser.parse_args([]))

otherarch/sdcpp/sdtype_adapter.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ static uint8_t * input_mask_buffer = NULL;
119119
static uint8_t * input_photomaker_buffer = NULL;
120120

121121
static std::string sdplatformenv, sddeviceenv, sdvulkandeviceenv;
122-
static bool notiling = false;
122+
static int cfg_tiled_vae_threshold = 0;
123123
static int cfg_square_limit = 0;
124124
static int cfg_side_limit = 0;
125125
static bool sd_is_quiet = false;
@@ -137,7 +137,9 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
137137
std::string clipl_filename = inputs.clipl_filename;
138138
std::string clipg_filename = inputs.clipg_filename;
139139
std::string photomaker_filename = inputs.photomaker_filename;
140-
notiling = inputs.notile;
140+
cfg_tiled_vae_threshold = inputs.tiled_vae_threshold;
141+
cfg_tiled_vae_threshold = (cfg_tiled_vae_threshold > 8192 ? 8192 : cfg_tiled_vae_threshold);
142+
cfg_tiled_vae_threshold = (cfg_tiled_vae_threshold <= 0 ? 8192 : cfg_tiled_vae_threshold); //if negative dont tile
141143
cfg_side_limit = inputs.img_hard_limit;
142144
cfg_square_limit = inputs.img_soft_limit;
143145
printf("\nImageGen Init - Load Model: %s\n",inputs.model_filename);
@@ -489,7 +491,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
489491
printf("\nKCPP SD: Requested dimensions %dx%d changed to %dx%d\n", inputs.width, inputs.height, sd_params->width, sd_params->height);
490492
}
491493

492-
bool dotile = (sd_params->width>768 || sd_params->height>768) && !notiling;
494+
// trigger tiling by image area, the memory used for the VAE buffer is 6656 bytes per image pixel, default 768x768
495+
bool dotile = (sd_params->width*sd_params->height > cfg_tiled_vae_threshold*cfg_tiled_vae_threshold);
493496
set_sd_vae_tiling(sd_ctx,dotile); //changes vae tiling, prevents memory related crash/oom
494497

495498
if (sd_params->clip_skip <= 0) {

0 commit comments

Comments
 (0)