Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion examples/cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ arguments:
-r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times)
--control-video [PATH] path to control video frames, It must be a directory path.
The video frames inside should be stored as images in lexicographical (character) order
For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, 鈥?etc.
For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, ... etc.
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
--disable-auto-resize-ref-image disable auto resize of ref images
-o, --output OUTPUT path to write result image to (default: ./output.png)
-p, --prompt [PROMPT] the prompt to render
-n, --negative-prompt PROMPT the negative prompt (default: "")
Expand Down
9 changes: 7 additions & 2 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ struct SDParams {
std::string control_image_path;
std::vector<std::string> ref_image_paths;
std::string control_video_path;
bool increase_ref_index = false;
bool auto_resize_ref_image = true;
bool increase_ref_index = false;

std::string prompt;
std::string negative_prompt;
Expand Down Expand Up @@ -175,6 +176,7 @@ void print_params(SDParams params) {
printf(" %s\n", path.c_str());
};
printf(" control_video_path: %s\n", params.control_video_path.c_str());
printf(" auto_resize_ref_image: %s\n", params.auto_resize_ref_image ? "true" : "false");
printf(" increase_ref_index: %s\n", params.increase_ref_index ? "true" : "false");
printf(" offload_params_to_cpu: %s\n", params.offload_params_to_cpu ? "true" : "false");
printf(" clip_on_cpu: %s\n", params.clip_on_cpu ? "true" : "false");
Expand Down Expand Up @@ -244,9 +246,10 @@ void print_usage(int argc, const char* argv[]) {
printf(" -i, --end-img [IMAGE] path to the end image, required by flf2v\n");
printf(" --control-image [IMAGE] path to image condition, control net\n");
printf(" -r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times) \n");
printf(" --disable-auto-resize-ref-image disable auto resize of ref images\n");
printf(" --control-video [PATH] path to control video frames, It must be a directory path.\n");
printf(" The video frames inside should be stored as images in lexicographical (character) order\n");
printf(" For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, etc.\n");
printf(" For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, ... etc.\n");
printf(" --increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).\n");
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
printf(" -p, --prompt [PROMPT] the prompt to render\n");
Expand Down Expand Up @@ -579,6 +582,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
{"", "--chroma-disable-dit-mask", "", false, &params.chroma_use_dit_mask},
{"", "--chroma-enable-t5-mask", "", true, &params.chroma_use_t5_mask},
{"", "--increase-ref-index", "", true, &params.increase_ref_index},
{"", "--disable-auto-resize-ref-image", "", false, &params.auto_resize_ref_image},
};

auto on_mode_arg = [&](int argc, const char** argv, int index) {
Expand Down Expand Up @@ -1428,6 +1432,7 @@ int main(int argc, const char* argv[]) {
init_image,
ref_images.data(),
(int)ref_images.size(),
params.auto_resize_ref_image,
params.increase_ref_index,
mask_image,
params.width,
Expand Down
14 changes: 11 additions & 3 deletions stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1981,6 +1981,7 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
"seed: %" PRId64
"batch_count: %d\n"
"ref_images_count: %d\n"
"auto_resize_ref_image: %s\n"
"increase_ref_index: %s\n"
"control_strength: %.2f\n"
"photo maker: {style_strength = %.2f, id_images_count = %d, id_embed_path = %s}\n"
Expand All @@ -1995,6 +1996,7 @@ char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params) {
sd_img_gen_params->seed,
sd_img_gen_params->batch_count,
sd_img_gen_params->ref_images_count,
BOOL_STR(sd_img_gen_params->auto_resize_ref_image),
BOOL_STR(sd_img_gen_params->increase_ref_index),
sd_img_gen_params->control_strength,
sd_img_gen_params->pm_params.style_strength,
Expand Down Expand Up @@ -2635,14 +2637,20 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
std::vector<ggml_tensor*> ref_latents;
for (int i = 0; i < ref_images.size(); i++) {
ggml_tensor* img;
if (sd_version_is_qwen_image(sd_ctx->sd->version)) {
if (sd_img_gen_params->auto_resize_ref_image) {
LOG_DEBUG("auto resize ref images");
sd_image_f32_t ref_image = sd_image_t_to_sd_image_f32_t(*ref_images[i]);
int VAE_IMAGE_SIZE = std::min(1024 * 1024, width * height);
double vae_width = sqrt(VAE_IMAGE_SIZE * ref_image.width / ref_image.height);
double vae_height = vae_width * ref_image.height / ref_image.width;

vae_height = round(vae_height / 32) * 32;
vae_width = round(vae_width / 32) * 32;
int factor = 16;
if (sd_version_is_qwen_image(sd_ctx->sd->version)) {
factor = 32;
}

vae_height = round(vae_height / factor) * factor;
vae_width = round(vae_width / factor) * factor;

sd_image_f32_t resized_image = resize_sd_image_f32_t(ref_image, static_cast<int>(vae_width), static_cast<int>(vae_height));
free(ref_image.data);
Expand Down
1 change: 1 addition & 0 deletions stable-diffusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ typedef struct {
sd_image_t init_image;
sd_image_t* ref_images;
int ref_images_count;
bool auto_resize_ref_image;
bool increase_ref_index;
sd_image_t mask_image;
int width;
Expand Down
Loading