Skip to content

Commit ac73c74

Browse files
committed
Merge branch 'master' into z-image
2 parents 7e2f3e8 + bc80225 commit ac73c74

File tree

8 files changed

+69
-29
lines changed

8 files changed

+69
-29
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ API and command-line option may change frequently.***
1515

1616
## 🔥Important News
1717

18+
* **2025/10/13** 🚀 stable-diffusion.cpp now supports **FLUX.2-dev**
19+
👉 Details: [PR #1016](https://github.com/leejet/stable-diffusion.cpp/pull/1016)
20+
1821
* **2025/10/13** 🚀 stable-diffusion.cpp now supports **Qwen-Image-Edit / Qwen-Image-Edit 2509**
1922
👉 Details: [PR #877](https://github.com/leejet/stable-diffusion.cpp/pull/877)
2023

conditioner.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,6 +1814,17 @@ struct LLMEmbedder : public Conditioner {
18141814
prompt_attn_range.second = static_cast<int>(prompt.size());
18151815

18161816
prompt += "<|im_end|>\n<|im_start|>assistant\n";
1817+
} else if (sd_version_is_flux2(version)) {
1818+
prompt_template_encode_start_idx = 0;
1819+
out_layers = {10, 20, 30};
1820+
1821+
prompt = "[SYSTEM_PROMPT]You are an AI that reasons about image descriptions. You give structured responses focusing on object relationships, object\nattribution and actions without speculation.[/SYSTEM_PROMPT][INST]";
1822+
1823+
prompt_attn_range.first = prompt.size();
1824+
prompt += conditioner_params.text;
1825+
prompt_attn_range.second = prompt.size();
1826+
1827+
prompt += "[/INST]";
18171828
} else {
18181829
prompt_template_encode_start_idx = 34;
18191830

examples/cli/main.cpp

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ struct SDParams {
151151
preview_t preview_method = PREVIEW_NONE;
152152
int preview_interval = 1;
153153
std::string preview_path = "preview.png";
154+
float preview_fps = 16;
154155
bool taesd_preview = false;
155156
bool preview_noisy = false;
156157

@@ -1638,18 +1639,16 @@ bool load_images_from_dir(const std::string dir,
16381639
return true;
16391640
}
16401641

1641-
std::string preview_path;
1642-
float preview_fps;
1643-
1644-
void step_callback(int step, int frame_count, sd_image_t* image, bool is_noisy) {
1642+
void step_callback(int step, int frame_count, sd_image_t* image, bool is_noisy, void* data) {
16451643
(void)step;
16461644
(void)is_noisy;
1645+
SDParams* params = (SDParams*)data;
16471646
// is_noisy is set to true if the preview corresponds to noisy latents, false if it's denoised latents
16481647
// unused in this app, it will either be always noisy or always denoised here
16491648
if (frame_count == 1) {
1650-
stbi_write_png(preview_path.c_str(), image->width, image->height, image->channel, image->data, 0);
1649+
stbi_write_png(params->preview_path.c_str(), image->width, image->height, image->channel, image->data, 0);
16511650
} else {
1652-
create_mjpg_avi_from_sd_images(preview_path.c_str(), image, frame_count, preview_fps);
1651+
create_mjpg_avi_from_sd_images(params->preview_path.c_str(), image, frame_count, params->preview_fps);
16531652
}
16541653
}
16551654

@@ -1662,7 +1661,6 @@ int main(int argc, const char* argv[]) {
16621661
// ZImage::ZImageRunner::load_from_file_and_test(argv[1]);
16631662
// return 1;
16641663
parse_args(argc, argv, params);
1665-
preview_path = params.preview_path;
16661664
if (params.video_frames > 4) {
16671665
size_t last_dot_pos = params.preview_path.find_last_of(".");
16681666
std::string base_path = params.preview_path;
@@ -1673,20 +1671,20 @@ int main(int argc, const char* argv[]) {
16731671
std::transform(file_ext.begin(), file_ext.end(), file_ext.begin(), ::tolower);
16741672
}
16751673
if (file_ext == ".png") {
1676-
preview_path = base_path + ".avi";
1674+
params.preview_path = base_path + ".avi";
16771675
}
16781676
}
1679-
preview_fps = params.fps;
1677+
params.preview_fps = params.fps;
16801678
if (params.preview_method == PREVIEW_PROJ)
1681-
preview_fps /= 4.0f;
1679+
params.preview_fps /= 4.0f;
16821680

16831681
params.sample_params.guidance.slg.layers = params.skip_layers.data();
16841682
params.sample_params.guidance.slg.layer_count = params.skip_layers.size();
16851683
params.high_noise_sample_params.guidance.slg.layers = params.high_noise_skip_layers.data();
16861684
params.high_noise_sample_params.guidance.slg.layer_count = params.high_noise_skip_layers.size();
16871685

16881686
sd_set_log_callback(sd_log_cb, (void*)&params);
1689-
sd_set_preview_callback((sd_preview_cb_t)step_callback, params.preview_method, params.preview_interval, !params.preview_noisy, params.preview_noisy);
1687+
sd_set_preview_callback(step_callback, params.preview_method, params.preview_interval, !params.preview_noisy, params.preview_noisy, (void*)&params);
16901688

16911689
if (params.verbose) {
16921690
print_params(params);

ggml_extend.hpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1141,14 +1141,23 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_ones(struct ggml_context* ctx,
11411141
}
11421142

11431143
__STATIC_INLINE__ ggml_tensor* ggml_ext_cast_f32(ggml_context* ctx, ggml_tensor* a) {
1144+
#ifdef SD_USE_VULKAN
1145+
auto zero_index = ggml_get_tensor(ctx, "ggml_runner_build_in_tensor:zero_int");
1146+
auto out = ggml_reshape_1d(ctx, a, ggml_nelements(a));
1147+
out = ggml_get_rows(ctx, out, zero_index);
1148+
out = ggml_reshape(ctx, out, a);
1149+
// auto out = ggml_cast(ctx, a, GGML_TYPE_F32);
1150+
return out;
1151+
#else
11441152
auto out = ggml_reshape_2d(ctx, a, 1, ggml_nelements(a));
11451153
ggml_tensor* one = ggml_ext_ones(ctx, 1, 1, 1, 1); // [1,]
11461154
if (ggml_is_transposed(out)) {
11471155
out = ggml_mul_mat(ctx, one, out);
11481156
} else {
11491157
out = ggml_mul_mat(ctx, out, one);
11501158
}
1151-
out = ggml_reshape(ctx, out, a);
1159+
out = ggml_reshape(ctx, out, a);
1160+
#endif
11521161
return out;
11531162
}
11541163

@@ -1556,6 +1565,9 @@ struct GGMLRunner {
15561565
std::vector<float> one_vec = {1.f};
15571566
ggml_tensor* one_tensor = nullptr;
15581567

1568+
std::vector<int> zero_int_vec = {0};
1569+
ggml_tensor* zero_int_tensor = nullptr;
1570+
15591571
std::map<struct ggml_tensor*, const void*> backend_tensor_data_map;
15601572
std::map<std::string, struct ggml_tensor*> cache_tensor_map; // name -> tensor
15611573
const std::string final_result_name = "ggml_runner_final_result_tensor";
@@ -1626,10 +1638,15 @@ struct GGMLRunner {
16261638
one_tensor = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, 1);
16271639
ggml_set_name(one_tensor, "ggml_runner_build_in_tensor:one");
16281640
set_backend_tensor_data(one_tensor, one_vec.data());
1641+
1642+
zero_int_tensor = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_I32, 1);
1643+
ggml_set_name(zero_int_tensor, "ggml_runner_build_in_tensor:zero_int");
1644+
set_backend_tensor_data(zero_int_tensor, zero_int_vec.data());
16291645
}
16301646

16311647
void prepare_build_in_tensor_after(struct ggml_cgraph* gf) {
16321648
ggml_build_forward_expand(gf, one_tensor);
1649+
ggml_build_forward_expand(gf, zero_int_tensor);
16331650
}
16341651

16351652
struct ggml_cgraph* new_graph_custom(size_t graph_size) {

stable-diffusion.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ const char* model_version_to_str[] = {
4545
"Wan 2.2 TI2V",
4646
"Qwen Image",
4747
"Flux.2",
48-
"Z Image",
48+
"Z-Image",
4949
};
5050

5151
const char* sampling_methods_str[] = {
@@ -1325,7 +1325,8 @@ class StableDiffusionGGML {
13251325
enum SDVersion version,
13261326
preview_t preview_mode,
13271327
ggml_tensor* result,
1328-
std::function<void(int, int, sd_image_t*, bool)> step_callback,
1328+
std::function<void(int, int, sd_image_t*, bool, void*)> step_callback,
1329+
void* step_callback_data,
13291330
bool is_noisy) {
13301331
const uint32_t channel = 3;
13311332
uint32_t width = latents->ne[0];
@@ -1396,7 +1397,7 @@ class StableDiffusionGGML {
13961397
for (int i = 0; i < frames; i++) {
13971398
images[i] = {width, height, channel, data + i * width * height * channel};
13981399
}
1399-
step_callback(step, frames, images, is_noisy);
1400+
step_callback(step, frames, images, is_noisy, step_callback_data);
14001401
free(data);
14011402
free(images);
14021403
} else {
@@ -1450,7 +1451,7 @@ class StableDiffusionGGML {
14501451
images[i].data = ggml_tensor_to_sd_image(result, i, ggml_n_dims(latents) == 4);
14511452
}
14521453

1453-
step_callback(step, frames, images, is_noisy);
1454+
step_callback(step, frames, images, is_noisy, step_callback_data);
14541455

14551456
ggml_ext_tensor_scale_inplace(result, 0);
14561457
for (int i = 0; i < frames; i++) {
@@ -1599,8 +1600,9 @@ class StableDiffusionGGML {
15991600
}
16001601

16011602
auto denoise = [&](ggml_tensor* input, float sigma, int step) -> ggml_tensor* {
1602-
auto sd_preview_cb = sd_get_preview_callback();
1603-
auto sd_preview_mode = sd_get_preview_mode();
1603+
auto sd_preview_cb = sd_get_preview_callback();
1604+
auto sd_preview_cb_data = sd_get_preview_callback_data();
1605+
auto sd_preview_mode = sd_get_preview_mode();
16041606
if (step == 1 || step == -1) {
16051607
pretty_progress(0, (int)steps, 0);
16061608
}
@@ -1671,7 +1673,7 @@ class StableDiffusionGGML {
16711673
}
16721674
if (sd_preview_cb != nullptr && sd_should_preview_noisy()) {
16731675
if (step % sd_get_preview_interval() == 0) {
1674-
preview_image(work_ctx, step, noised_input, version, sd_preview_mode, preview_tensor, sd_preview_cb, true);
1676+
preview_image(work_ctx, step, noised_input, version, sd_preview_mode, preview_tensor, sd_preview_cb, sd_preview_cb_data, true);
16751677
}
16761678
}
16771679

@@ -1819,7 +1821,7 @@ class StableDiffusionGGML {
18191821

18201822
if (sd_preview_cb != nullptr && sd_should_preview_denoised()) {
18211823
if (step % sd_get_preview_interval() == 0) {
1822-
preview_image(work_ctx, step, denoised, version, sd_preview_mode, preview_tensor, sd_preview_cb, false);
1824+
preview_image(work_ctx, step, denoised, version, sd_preview_mode, preview_tensor, sd_preview_cb, sd_preview_cb_data, false);
18231825
}
18241826
}
18251827

stable-diffusion.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,11 +283,11 @@ typedef struct sd_ctx_t sd_ctx_t;
283283

284284
typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data);
285285
typedef void (*sd_progress_cb_t)(int step, int steps, float time, void* data);
286-
typedef void (*sd_preview_cb_t)(int step, int frame_count, sd_image_t* frames, bool is_noisy);
286+
typedef void (*sd_preview_cb_t)(int step, int frame_count, sd_image_t* frames, bool is_noisy, void* data);
287287

288288
SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data);
289289
SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
290-
SD_API void sd_set_preview_callback(sd_preview_cb_t cb, enum preview_t mode, int interval, bool denoised, bool noisy);
290+
SD_API void sd_set_preview_callback(sd_preview_cb_t cb, enum preview_t mode, int interval, bool denoised, bool noisy, void* data);
291291
SD_API int32_t get_num_physical_cores();
292292
SD_API const char* sd_get_system_info();
293293

util.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ static sd_progress_cb_t sd_progress_cb = nullptr;
187187
void* sd_progress_cb_data = nullptr;
188188

189189
static sd_preview_cb_t sd_preview_cb = nullptr;
190+
static void* sd_preview_cb_data = nullptr;
190191
preview_t sd_preview_mode = PREVIEW_NONE;
191192
int sd_preview_interval = 1;
192193
bool sd_preview_denoised = true;
@@ -273,13 +274,16 @@ void pretty_progress(int step, int steps, float time) {
273274
}
274275
}
275276
progress += "|";
276-
printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s\033[K",
277-
progress.c_str(), step, steps,
278-
time > 1.0f || time == 0 ? time : (1.0f / time));
279-
fflush(stdout); // for linux
280-
if (step == steps) {
281-
printf("\n");
277+
278+
const char* lf = (step == steps ? "\n" : "");
279+
const char* unit = "s/it";
280+
float speed = time;
281+
if (speed < 1.0f && speed > 0.f) {
282+
speed = 1.0f / speed;
283+
unit = "it/s";
282284
}
285+
printf("\r%s %i/%i - %.2f%s\033[K%s", progress.c_str(), step, steps, speed, unit, lf);
286+
fflush(stdout); // for linux
283287
}
284288

285289
std::string ltrim(const std::string& s) {
@@ -335,8 +339,9 @@ void sd_set_progress_callback(sd_progress_cb_t cb, void* data) {
335339
sd_progress_cb = cb;
336340
sd_progress_cb_data = data;
337341
}
338-
void sd_set_preview_callback(sd_preview_cb_t cb, preview_t mode = PREVIEW_PROJ, int interval = 1, bool denoised = true, bool noisy = false) {
342+
void sd_set_preview_callback(sd_preview_cb_t cb, preview_t mode, int interval, bool denoised, bool noisy, void* data) {
339343
sd_preview_cb = cb;
344+
sd_preview_cb_data = data;
340345
sd_preview_mode = mode;
341346
sd_preview_interval = interval;
342347
sd_preview_denoised = denoised;
@@ -346,6 +351,9 @@ void sd_set_preview_callback(sd_preview_cb_t cb, preview_t mode = PREVIEW_PROJ,
346351
sd_preview_cb_t sd_get_preview_callback() {
347352
return sd_preview_cb;
348353
}
354+
void* sd_get_preview_callback_data() {
355+
return sd_preview_cb_data;
356+
}
349357

350358
preview_t sd_get_preview_mode() {
351359
return sd_preview_mode;

util.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ sd_progress_cb_t sd_get_progress_callback();
5858
void* sd_get_progress_callback_data();
5959

6060
sd_preview_cb_t sd_get_preview_callback();
61+
void* sd_get_preview_callback_data();
6162
preview_t sd_get_preview_mode();
6263
int sd_get_preview_interval();
6364
bool sd_should_preview_denoised();

0 commit comments

Comments
 (0)