Skip to content

Commit f6d2d1c

Browse files
wbrunaLostRuins
andauthored
configurable resolution limit (#1586)
* refactor image gen configuration screen * make image size limit configurable * fix resolution limits and keep dimensions closer to the original ratio * use 0.0 for the configured default image size limit This prevents the current default value from being saved into the config files, in case we later decide to adopt a different value. * export image model version when loading * restore model-specific default image size limit * change the image area restriction to be specified by a square side * move image resolution limits down to the C++ level * Revert "export image model version when loading" This reverts commit fa65b23. * Linting Fixes: PY: - Inconsistent var name sd_restrict_square -> sd_restrict_square_var - GUI swap back to using absolute row numbers for now. - fstring fix - size_limit -> side_limit inconsistency C++: - roundup_64 standalone function - refactor sd_fix_resolution variable names for clarity - move "anti crashing" hard total megapixel limit always to be applied after soft total megapixel limit instead of conditionally only when sd_restrict_square is unset * allow unsafe resolutions if debugmode is on --------- Co-authored-by: Concedo <[email protected]>
1 parent f1c9db4 commit f6d2d1c

File tree

3 files changed

+146
-42
lines changed

3 files changed

+146
-42
lines changed

expose.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,8 @@ struct sd_load_model_inputs
169169
const char * vae_filename = nullptr;
170170
const char * lora_filename = nullptr;
171171
const float lora_multiplier = 1.0f;
172+
const int side_limit = 0;
173+
const int square_limit = 0;
172174
const bool quiet = false;
173175
const int debugmode = 0;
174176
};

koboldcpp.py

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,8 @@ class sd_load_model_inputs(ctypes.Structure):
274274
("vae_filename", ctypes.c_char_p),
275275
("lora_filename", ctypes.c_char_p),
276276
("lora_multiplier", ctypes.c_float),
277+
("side_limit", ctypes.c_int),
278+
("square_limit", ctypes.c_int),
277279
("quiet", ctypes.c_bool),
278280
("debugmode", ctypes.c_int)]
279281

@@ -1539,6 +1541,8 @@ def sd_load_model(model_filename,vae_filename,lora_filename,t5xxl_filename,clipl
15391541
inputs.t5xxl_filename = t5xxl_filename.encode("UTF-8")
15401542
inputs.clipl_filename = clipl_filename.encode("UTF-8")
15411543
inputs.clipg_filename = clipg_filename.encode("UTF-8")
1544+
inputs.side_limit = args.sdclamped
1545+
inputs.square_limit = args.sdrestrictsquare
15421546
inputs = set_backend_props(inputs)
15431547
ret = handle.sd_load_model(inputs)
15441548
return ret
@@ -1618,27 +1622,11 @@ def sd_generate(genparams):
16181622
clip_skip = tryparseint(genparams.get("clip_skip", -1),-1)
16191623

16201624
#clean vars
1621-
width = width - (width%64)
1622-
height = height - (height%64)
16231625
cfg_scale = (1 if cfg_scale < 1 else (25 if cfg_scale > 25 else cfg_scale))
16241626
sample_steps = (1 if sample_steps < 1 else (forced_steplimit if sample_steps > forced_steplimit else sample_steps))
1625-
reslimit = 1024
1626-
width = (64 if width < 64 else width)
1627-
height = (64 if height < 64 else height)
16281627

16291628
if args.sdclamped:
16301629
sample_steps = (40 if sample_steps > 40 else sample_steps)
1631-
reslimit = int(args.sdclamped)
1632-
reslimit = (512 if reslimit<512 else reslimit)
1633-
print(f"\nImgGen: Clamped Mode (For Shared Use). Step counts and resolution are clamped to {reslimit}x{reslimit}.")
1634-
1635-
biggest = max(width,height)
1636-
if biggest > reslimit:
1637-
scaler = biggest / reslimit
1638-
width = int(width / scaler)
1639-
height = int(height / scaler)
1640-
width = width - (width%64)
1641-
height = height - (height%64)
16421630

16431631
inputs = sd_generation_inputs()
16441632
inputs.prompt = prompt.encode("UTF-8")
@@ -4268,6 +4256,7 @@ def hide_tooltip(event):
42684256
sd_vaeauto_var = ctk.IntVar(value=0)
42694257
sd_notile_var = ctk.IntVar(value=0)
42704258
sd_clamped_var = ctk.StringVar(value="0")
4259+
sd_restrict_square_var = ctk.StringVar(value="0")
42714260
sd_threads_var = ctk.StringVar(value=str(default_threads))
42724261
sd_quant_var = ctk.IntVar(value=0)
42734262

@@ -4965,20 +4954,22 @@ def togglehorde(a,b,c):
49654954
images_tab = tabcontent["Image Gen"]
49664955
makefileentry(images_tab, "Image Gen. Model (safetensors/gguf):", "Select Image Gen Model File", sd_model_var, 1, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")], tooltiptxt="Select a .safetensors or .gguf Image Generation model file on disk to be loaded.")
49674956
makelabelentry(images_tab, "Clamped Mode (Limit Resolution):", sd_clamped_var, 4, 50, padx=290,singleline=True,tooltip="Limit generation steps and resolution settings for shared use.\nSet to 0 to disable, otherwise value is the size limit (min 512px).")
4968-
makelabelentry(images_tab, "Image Threads:" , sd_threads_var, 6, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.")
4957+
makelabelentry(images_tab, "Restrict Square Size:", sd_restrict_square_var, 6, 50, padx=290,singleline=True,tooltip="Square image size restriction, to protect the server against memory crashes.\nAllows width-height tradeoffs, eg. 640 allows 640x640 and 512x768\nLeave at 0 for the default value: 832 for SD1.5/SD2, 1024 otherwise.")
4958+
makelabelentry(images_tab, "Image Threads:" , sd_threads_var, 8, 50,padx=290,singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.")
49694959
sd_model_var.trace("w", gui_changed_modelfile)
4960+
makecheckbox(images_tab, "Compress Weights (Saves Memory)", sd_quant_var, 10,tooltiptxt="Quantizes the SD model weights to save memory. May degrade quality.")
4961+
sd_quant_var.trace("w", changed_gpulayers_estimate)
4962+
4963+
makefileentry(images_tab, "Image LoRA (safetensors/gguf):", "Select SD lora file",sd_lora_var, 20, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded. Should be unquantized!")
4964+
makelabelentry(images_tab, "Image LoRA Multiplier:" , sd_loramult_var, 22, 50,padx=290,singleline=True,tooltip="What mutiplier value to apply the SD LoRA with.")
49704965

4971-
makefileentry(images_tab, "Image LoRA (safetensors/gguf):", "Select SD lora file",sd_lora_var, 10, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD LoRA model file to be loaded. Should be unquantized!")
4972-
makelabelentry(images_tab, "Image LoRA Multiplier:" , sd_loramult_var, 12, 50,padx=290,singleline=True,tooltip="What mutiplier value to apply the SD LoRA with.")
49734966

4974-
makecheckbox(images_tab, "Compress Weights (Saves Memory)", sd_quant_var, 8,tooltiptxt="Quantizes the SD model weights to save memory. May degrade quality.")
4975-
sd_quant_var.trace("w", changed_gpulayers_estimate)
49764967

4977-
makefileentry(images_tab, "T5-XXL File:", "Select Optional T5-XXL model file (SD3 or flux)",sd_t5xxl_var, 14, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.")
4978-
makefileentry(images_tab, "Clip-L File:", "Select Optional Clip-L model file (SD3 or flux)",sd_clipl_var, 16, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.")
4979-
makefileentry(images_tab, "Clip-G File:", "Select Optional Clip-G model file (SD3)",sd_clipg_var, 18, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.")
4968+
makefileentry(images_tab, "T5-XXL File:", "Select Optional T5-XXL model file (SD3 or flux)",sd_t5xxl_var, 24, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.")
4969+
makefileentry(images_tab, "Clip-L File:", "Select Optional Clip-L model file (SD3 or flux)",sd_clipl_var, 26, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.")
4970+
makefileentry(images_tab, "Clip-G File:", "Select Optional Clip-G model file (SD3)",sd_clipg_var, 28, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")],tooltiptxt="Select a .safetensors t5xxl file to be loaded.")
49804971

4981-
sdvaeitem1,sdvaeitem2,sdvaeitem3 = makefileentry(images_tab, "Image VAE:", "Select Optional SD VAE file",sd_vae_var, 20, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD VAE file to be loaded.")
4972+
sdvaeitem1,sdvaeitem2,sdvaeitem3 = makefileentry(images_tab, "Image VAE:", "Select Optional SD VAE file",sd_vae_var, 30, width=280, singlerow=True, filetypes=[("*.safetensors *.gguf", "*.safetensors *.gguf")],tooltiptxt="Select a .safetensors or .gguf SD VAE file to be loaded.")
49824973
def toggletaesd(a,b,c):
49834974
if sd_vaeauto_var.get()==1:
49844975
sdvaeitem1.grid_remove()
@@ -4989,7 +4980,7 @@ def toggletaesd(a,b,c):
49894980
sdvaeitem1.grid()
49904981
sdvaeitem2.grid()
49914982
sdvaeitem3.grid()
4992-
makecheckbox(images_tab, "Use TAE SD (AutoFix Broken VAE)", sd_vaeauto_var, 22,command=toggletaesd,tooltiptxt="Replace VAE with TAESD. May fix bad VAE.")
4983+
makecheckbox(images_tab, "Use TAE SD (AutoFix Broken VAE)", sd_vaeauto_var, 32,command=toggletaesd,tooltiptxt="Replace VAE with TAESD. May fix bad VAE.")
49934984
makecheckbox(images_tab, "No VAE Tiling", sd_notile_var, 24,tooltiptxt="Disables VAE tiling, may not work for large images.")
49944985

49954986
# audio tab
@@ -5222,6 +5213,7 @@ def export_vars():
52225213

52235214
args.sdthreads = (0 if sd_threads_var.get()=="" else int(sd_threads_var.get()))
52245215
args.sdclamped = (0 if int(sd_clamped_var.get())<=0 else int(sd_clamped_var.get()))
5216+
args.sdrestrictsquare = (0 if int(sd_restrict_square_var.get())<=0 else int(sd_restrict_square_var.get()))
52255217
args.sdnotile = (True if sd_notile_var.get()==1 else False)
52265218
if sd_vaeauto_var.get()==1:
52275219
args.sdvaeauto = True
@@ -5432,6 +5424,7 @@ def import_vars(dict):
54325424

54335425
sd_model_var.set(dict["sdmodel"] if ("sdmodel" in dict and dict["sdmodel"]) else "")
54345426
sd_clamped_var.set(int(dict["sdclamped"]) if ("sdclamped" in dict and dict["sdclamped"]) else 0)
5427+
sd_restrict_square_var.set(int(dict["sdrestrictsquare"]) if ("sdrestrictsquare" in dict and dict["sdrestrictsquare"]) else 0)
54355428
sd_threads_var.set(str(dict["sdthreads"]) if ("sdthreads" in dict and dict["sdthreads"]) else str(default_threads))
54365429
sd_quant_var.set(1 if ("sdquant" in dict and dict["sdquant"]) else 0)
54375430
sd_vae_var.set(dict["sdvae"] if ("sdvae" in dict and dict["sdvae"]) else "")
@@ -7168,6 +7161,7 @@ def range_checker(arg: str):
71687161
sdparsergroup.add_argument("--sdmodel", metavar=('[filename]'), help="Specify an image generation safetensors or gguf model to enable image generation.", default="")
71697162
sdparsergroup.add_argument("--sdthreads", metavar=('[threads]'), help="Use a different number of threads for image generation if specified. Otherwise, has the same value as --threads.", type=int, default=0)
71707163
sdparsergroup.add_argument("--sdclamped", metavar=('[maxres]'), help="If specified, limit generation steps and resolution settings for shared use. Accepts an extra optional parameter that indicates maximum resolution (eg. 768 clamps to 768x768, min 512px, disabled if 0).", nargs='?', const=512, type=int, default=0)
7164+
sdparsergroup.add_argument("--sdrestrictsquare", metavar=('[maxres]'), help="If specified, restrict square image sides to this value, in pixels, to avoid server crashes related to excessive memory usage. Similar to --sdclamped, but allows trade-offs between width and height (e.g. 640 would allow 640x640, 512x768 and 768x512 images). If 0 or unspecified, use a model-specific safe value: 832 for SD1.5/SD2, 1024 otherwise. Total resolution cannot exceed 1MP.", type=int, default=0)
71717165
sdparsergroup.add_argument("--sdt5xxl", metavar=('[filename]'), help="Specify a T5-XXL safetensors model for use in SD3 or Flux. Leave blank if prebaked or unused.", default="")
71727166
sdparsergroup.add_argument("--sdclipl", metavar=('[filename]'), help="Specify a Clip-L safetensors model for use in SD3 or Flux. Leave blank if prebaked or unused.", default="")
71737167
sdparsergroup.add_argument("--sdclipg", metavar=('[filename]'), help="Specify a Clip-G safetensors model for use in SD3. Leave blank if prebaked or unused.", default="")

otherarch/sdcpp/sdtype_adapter.cpp

Lines changed: 124 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ static uint8_t * input_mask_buffer = NULL;
119119

120120
static std::string sdplatformenv, sddeviceenv, sdvulkandeviceenv;
121121
static bool notiling = false;
122+
static int cfg_square_limit = 0;
123+
static int cfg_side_limit = 0;
122124
static bool sd_is_quiet = false;
123125
static std::string sdmodelfilename = "";
124126

@@ -133,6 +135,8 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
133135
std::string clipl_filename = inputs.clipl_filename;
134136
std::string clipg_filename = inputs.clipg_filename;
135137
notiling = inputs.notile;
138+
cfg_side_limit = inputs.side_limit;
139+
cfg_square_limit = inputs.square_limit;
136140
printf("\nImageGen Init - Load Model: %s\n",inputs.model_filename);
137141

138142
if(lorafilename!="")
@@ -307,6 +311,99 @@ static std::string get_image_params(const SDParams& params) {
307311
return parameter_string;
308312
}
309313

314+
static inline int rounddown_64(int n) {
315+
return n - n % 64;
316+
}
317+
318+
static inline int roundup_64(int n) {
319+
return ((n + 63) / 64) * 64;
320+
}
321+
322+
//scale dimensions to ensure width and height stay within limits
323+
//side_limit = sdclamped, hard size limit per side, no side can exceed this
324+
//square limit = total NxN resolution based limit to also apply
325+
static void sd_fix_resolution(int &width, int &height, int side_limit, int square_limit) {
326+
327+
// sanitize the original values
328+
width = std::max(std::min(width, 8192), 64);
329+
height = std::max(std::min(height, 8192), 64);
330+
331+
bool is_landscape = (width > height);
332+
int long_side = is_landscape ? width : height;
333+
int short_side = is_landscape ? height : width;
334+
float original_ratio = static_cast<float>(long_side) / short_side;
335+
336+
// for the initial rounding, don't bother comparing to the original
337+
// requested ratio, since the user can choose those values directly
338+
long_side = rounddown_64(long_side);
339+
short_side = rounddown_64(short_side);
340+
side_limit = rounddown_64(side_limit);
341+
342+
//enforce sdclamp side limit
343+
if (long_side > side_limit) {
344+
short_side = static_cast<int>(short_side * side_limit / static_cast<float>(long_side));
345+
long_side = side_limit;
346+
if (short_side <= 64) {
347+
short_side = 64;
348+
} else {
349+
int down = rounddown_64(short_side);
350+
int up = roundup_64(short_side);
351+
float longf = static_cast<float>(long_side);
352+
// Choose better ratio match between rounding up or down
353+
short_side = (longf / down - original_ratio < original_ratio - longf / up) ? down : up;
354+
}
355+
}
356+
357+
//enforce sd_restrict_square area limit
358+
int area_limit = square_limit * square_limit;
359+
if (long_side * short_side > area_limit) {
360+
float scale = std::sqrt(static_cast<float>(area_limit) / (long_side * short_side));
361+
int new_short = static_cast<int>(short_side * scale);
362+
int new_long = static_cast<int>(long_side * scale);
363+
364+
if (new_short <= 64) {
365+
short_side = 64;
366+
long_side = rounddown_64(area_limit / short_side);
367+
} else {
368+
int new_long_down = rounddown_64(new_long);
369+
int new_short_down = rounddown_64(new_short);
370+
int new_short_up = roundup_64(new_short);
371+
int new_long_up = roundup_64(new_long);
372+
long_side = new_long_down;
373+
short_side = new_short_down;
374+
375+
// we may get a ratio closer to the original if we still stay below the
376+
// limit when rounding up one of the dimensions, so check both cases
377+
float rdiff = std::fabs(static_cast<float>(new_long_down) / new_short_down - original_ratio);
378+
379+
if (new_long_down * new_short_up < area_limit) {
380+
float newrdiff = std::fabs(static_cast<float>(new_long_down) / new_short_up - original_ratio);
381+
if (newrdiff < rdiff) {
382+
long_side = new_long_down;
383+
short_side = new_short_up;
384+
rdiff = newrdiff;
385+
}
386+
}
387+
388+
if (new_long_up * new_short_down < area_limit) {
389+
float newrdiff = std::fabs(static_cast<float>(new_long_up) / new_short_down - original_ratio);
390+
if (newrdiff < rdiff) {
391+
long_side = new_long_up;
392+
short_side = new_short_down;
393+
}
394+
}
395+
}
396+
}
397+
398+
if (is_landscape) {
399+
width = long_side;
400+
height = short_side;
401+
} else {
402+
width = short_side;
403+
height = long_side;
404+
}
405+
}
406+
310407
sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
311408
{
312409
sd_generation_outputs output;
@@ -339,8 +436,6 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
339436
sd_params->clip_skip = inputs.clip_skip;
340437
sd_params->mode = (img2img_data==""?SDMode::TXT2IMG:SDMode::IMG2IMG);
341438

342-
//ensure unsupported dimensions are fixed
343-
int biggestdim = (sd_params->width>sd_params->height?sd_params->width:sd_params->height);
344439
auto loadedsdver = get_loaded_sd_version(sd_ctx);
345440
if (loadedsdver == SDVersion::VERSION_FLUX)
346441
{
@@ -351,21 +446,34 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
351446
sampler = "euler"; //euler a broken on flux
352447
}
353448
}
354-
int reslimit = (loadedsdver==SDVersion::VERSION_SD1 || loadedsdver==SDVersion::VERSION_SD2)?832:1024;
355-
if(biggestdim > reslimit)
356-
{
357-
float scaler = (float)biggestdim / (float)reslimit;
358-
int newwidth = (int)((float)sd_params->width / scaler);
359-
int newheight = (int)((float)sd_params->height / scaler);
360-
newwidth = newwidth - (newwidth%64);
361-
newheight = newheight - (newheight%64);
362-
sd_params->width = newwidth;
363-
sd_params->height = newheight;
364-
if(!sd_is_quiet && sddebugmode==1)
365-
{
366-
printf("\nDownscale to %dx%d as %d > %d\n",newwidth,newheight,biggestdim,reslimit);
367-
}
449+
450+
const int default_res_limit = 8192; // arbitrary, just to simplify the code
451+
// avoid crashes due to bugs/limitations on certain models
452+
// although it can be possible for a single side to exceed 1024, the total resolution of the image
453+
// cannot exceed (832x832) for sd1/sd2 or (1024x1024) for sdxl/sd3/flux, to prevent crashing the server
454+
const int hard_megapixel_res_limit = (loadedsdver==SDVersion::VERSION_SD1 || loadedsdver==SDVersion::VERSION_SD2)?832:1024;
455+
456+
int side_limit = default_res_limit;
457+
if (cfg_side_limit > 0) {
458+
side_limit = std::max(std::min(cfg_side_limit, default_res_limit), 64);
459+
}
460+
461+
int square_limit = default_res_limit;
462+
if (cfg_square_limit > 0) {
463+
square_limit = std::max(std::min(cfg_square_limit, default_res_limit), 64);
368464
}
465+
466+
if (cfg_square_limit > 0 && sddebugmode == 1) {
467+
square_limit = std::min(hard_megapixel_res_limit * 2, square_limit); //double the limit for debugmode if cfg_square_limit is set
468+
} else {
469+
square_limit = std::min(hard_megapixel_res_limit, square_limit);
470+
}
471+
472+
sd_fix_resolution(sd_params->width, sd_params->height, side_limit, square_limit);
473+
if (inputs.width != sd_params->width || inputs.height != sd_params->height) {
474+
printf("\nKCPP SD: Requested dimensions %dx%d changed to %dx%d\n", inputs.width, inputs.height, sd_params->width, sd_params->height);
475+
}
476+
369477
bool dotile = (sd_params->width>768 || sd_params->height>768) && !notiling;
370478
set_sd_vae_tiling(sd_ctx,dotile); //changes vae tiling, prevents memory related crash/oom
371479

0 commit comments

Comments
 (0)