Skip to content

Commit f40a707

Browse files
authored
feat: add sdcpp-specific generation metadata to image outputs (#1462)
1 parent 970c4a3 commit f40a707

3 files changed

Lines changed: 201 additions & 7 deletions

File tree

examples/cli/main.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -433,10 +433,11 @@ bool save_results(const SDCliParams& cli_params,
433433
if (!img.data)
434434
return false;
435435

436-
std::string params = gen_params.embed_image_metadata
437-
? get_image_params(ctx_params, gen_params, gen_params.seed + idx)
438-
: "";
439-
const bool ok = write_image_to_file(path.string(), img.data, img.width, img.height, img.channel, params, 90);
436+
const int64_t metadata_seed = cli_params.mode == VID_GEN ? gen_params.seed : gen_params.seed + idx;
437+
std::string params = gen_params.embed_image_metadata
438+
? get_image_params(ctx_params, gen_params, metadata_seed, cli_params.mode)
439+
: "";
440+
const bool ok = write_image_to_file(path.string(), img.data, img.width, img.height, img.channel, params, 90);
440441
LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure");
441442
return ok;
442443
};

examples/common/common.cpp

Lines changed: 188 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2281,7 +2281,192 @@ std::string version_string() {
22812281
return std::string("stable-diffusion.cpp version ") + sd_version() + ", commit " + sd_commit();
22822282
}
22832283

2284-
std::string get_image_params(const SDContextParams& ctx_params, const SDGenerationParams& gen_params, int64_t seed) {
2284+
static std::string safe_json_string(const char* value) {
2285+
return value ? value : "";
2286+
}
2287+
2288+
static void set_json_basename_if_not_empty(json& target, const char* key, const std::string& path) {
2289+
if (!path.empty()) {
2290+
target[key] = sd_basename(path);
2291+
}
2292+
}
2293+
2294+
static json build_sampling_metadata_json(const sd_sample_params_t& sample_params,
2295+
const std::vector<int>& skip_layers,
2296+
const std::vector<float>* custom_sigmas = nullptr) {
2297+
json sampling = {
2298+
{"steps", sample_params.sample_steps},
2299+
{"eta", sample_params.eta},
2300+
{"shifted_timestep", sample_params.shifted_timestep},
2301+
{"flow_shift", sample_params.flow_shift},
2302+
{"guidance",
2303+
{
2304+
{"txt_cfg", sample_params.guidance.txt_cfg},
2305+
{"img_cfg", sample_params.guidance.img_cfg},
2306+
{"distilled_guidance", sample_params.guidance.distilled_guidance},
2307+
{"slg",
2308+
{
2309+
{"scale", sample_params.guidance.slg.scale},
2310+
{"layers", skip_layers},
2311+
{"start", sample_params.guidance.slg.layer_start},
2312+
{"end", sample_params.guidance.slg.layer_end},
2313+
}},
2314+
}},
2315+
};
2316+
if (sample_params.sample_method != SAMPLE_METHOD_COUNT) {
2317+
sampling["method"] = safe_json_string(sd_sample_method_name(sample_params.sample_method));
2318+
}
2319+
if (sample_params.scheduler != SCHEDULER_COUNT) {
2320+
sampling["scheduler"] = safe_json_string(sd_scheduler_name(sample_params.scheduler));
2321+
}
2322+
if (custom_sigmas != nullptr) {
2323+
sampling["custom_sigmas"] = *custom_sigmas;
2324+
}
2325+
return sampling;
2326+
}
2327+
2328+
std::string build_sdcpp_image_metadata_json(const SDContextParams& ctx_params,
2329+
const SDGenerationParams& gen_params,
2330+
int64_t seed,
2331+
SDMode mode) {
2332+
json root;
2333+
root["schema"] = "sdcpp.image.params/v1";
2334+
root["mode"] = mode == VID_GEN ? "vid_gen" : "img_gen";
2335+
root["generator"] = {
2336+
{"name", "stable-diffusion.cpp"},
2337+
{"version", safe_json_string(sd_version())},
2338+
{"commit", safe_json_string(sd_commit())},
2339+
};
2340+
root["seed"] = seed;
2341+
root["width"] = gen_params.get_resolved_width();
2342+
root["height"] = gen_params.get_resolved_height();
2343+
2344+
root["prompt"] = {
2345+
{"positive", gen_params.prompt},
2346+
{"negative", gen_params.negative_prompt},
2347+
};
2348+
root["sampling"] = build_sampling_metadata_json(gen_params.sample_params,
2349+
gen_params.skip_layers,
2350+
&gen_params.custom_sigmas);
2351+
2352+
json models;
2353+
set_json_basename_if_not_empty(models, "model", ctx_params.model_path);
2354+
set_json_basename_if_not_empty(models, "clip_l", ctx_params.clip_l_path);
2355+
set_json_basename_if_not_empty(models, "clip_g", ctx_params.clip_g_path);
2356+
set_json_basename_if_not_empty(models, "clip_vision", ctx_params.clip_vision_path);
2357+
set_json_basename_if_not_empty(models, "t5xxl", ctx_params.t5xxl_path);
2358+
set_json_basename_if_not_empty(models, "llm", ctx_params.llm_path);
2359+
set_json_basename_if_not_empty(models, "llm_vision", ctx_params.llm_vision_path);
2360+
set_json_basename_if_not_empty(models, "diffusion_model", ctx_params.diffusion_model_path);
2361+
set_json_basename_if_not_empty(models, "high_noise_diffusion_model", ctx_params.high_noise_diffusion_model_path);
2362+
set_json_basename_if_not_empty(models, "vae", ctx_params.vae_path);
2363+
set_json_basename_if_not_empty(models, "taesd", ctx_params.taesd_path);
2364+
set_json_basename_if_not_empty(models, "control_net", ctx_params.control_net_path);
2365+
root["models"] = std::move(models);
2366+
2367+
root["clip_skip"] = gen_params.clip_skip;
2368+
root["strength"] = gen_params.strength;
2369+
root["control_strength"] = gen_params.control_strength;
2370+
root["auto_resize_ref_image"] = gen_params.auto_resize_ref_image;
2371+
root["increase_ref_index"] = gen_params.increase_ref_index;
2372+
if (mode == VID_GEN) {
2373+
root["video"] = {
2374+
{"frame_count", gen_params.video_frames},
2375+
{"fps", gen_params.fps},
2376+
};
2377+
root["moe_boundary"] = gen_params.moe_boundary;
2378+
root["vace_strength"] = gen_params.vace_strength;
2379+
root["high_noise_sampling"] = build_sampling_metadata_json(gen_params.high_noise_sample_params,
2380+
gen_params.high_noise_skip_layers);
2381+
}
2382+
2383+
root["rng"] = safe_json_string(sd_rng_type_name(ctx_params.rng_type));
2384+
if (ctx_params.sampler_rng_type != RNG_TYPE_COUNT) {
2385+
root["sampler_rng"] = safe_json_string(sd_rng_type_name(ctx_params.sampler_rng_type));
2386+
}
2387+
2388+
json loras = json::array();
2389+
for (const auto& entry : gen_params.lora_map) {
2390+
loras.push_back({
2391+
{"name", sd_basename(entry.first)},
2392+
{"multiplier", entry.second},
2393+
{"is_high_noise", false},
2394+
});
2395+
}
2396+
for (const auto& entry : gen_params.high_noise_lora_map) {
2397+
loras.push_back({
2398+
{"name", sd_basename(entry.first)},
2399+
{"multiplier", entry.second},
2400+
{"is_high_noise", true},
2401+
});
2402+
}
2403+
if (!loras.empty()) {
2404+
root["loras"] = std::move(loras);
2405+
}
2406+
2407+
if (gen_params.hires_enabled) {
2408+
root["hires"] = {
2409+
{"enabled", gen_params.hires_enabled},
2410+
{"upscaler", gen_params.hires_upscaler},
2411+
{"model", gen_params.hires_upscaler_model_path.empty() ? "" : sd_basename(gen_params.hires_upscaler_model_path)},
2412+
{"scale", gen_params.hires_scale},
2413+
{"target_width", gen_params.hires_width},
2414+
{"target_height", gen_params.hires_height},
2415+
{"steps", gen_params.hires_steps},
2416+
{"denoising_strength", gen_params.hires_denoising_strength},
2417+
{"upscale_tile_size", gen_params.hires_upscale_tile_size},
2418+
};
2419+
}
2420+
2421+
if (gen_params.cache_params.mode != SD_CACHE_DISABLED) {
2422+
root["cache"] = {
2423+
{"requested_mode", gen_params.cache_mode},
2424+
{"requested_option", gen_params.cache_option},
2425+
{"mode", gen_params.cache_params.mode},
2426+
{"scm_mask", gen_params.scm_mask},
2427+
{"scm_policy_dynamic", gen_params.scm_policy_dynamic},
2428+
{"reuse_threshold", gen_params.cache_params.reuse_threshold},
2429+
{"start_percent", gen_params.cache_params.start_percent},
2430+
{"end_percent", gen_params.cache_params.end_percent},
2431+
{"error_decay_rate", gen_params.cache_params.error_decay_rate},
2432+
{"use_relative_threshold", gen_params.cache_params.use_relative_threshold},
2433+
{"reset_error_on_compute", gen_params.cache_params.reset_error_on_compute},
2434+
{"Fn_compute_blocks", gen_params.cache_params.Fn_compute_blocks},
2435+
{"Bn_compute_blocks", gen_params.cache_params.Bn_compute_blocks},
2436+
{"residual_diff_threshold", gen_params.cache_params.residual_diff_threshold},
2437+
{"max_warmup_steps", gen_params.cache_params.max_warmup_steps},
2438+
{"max_cached_steps", gen_params.cache_params.max_cached_steps},
2439+
{"max_continuous_cached_steps", gen_params.cache_params.max_continuous_cached_steps},
2440+
{"taylorseer_n_derivatives", gen_params.cache_params.taylorseer_n_derivatives},
2441+
{"taylorseer_skip_interval", gen_params.cache_params.taylorseer_skip_interval},
2442+
{"spectrum_w", gen_params.cache_params.spectrum_w},
2443+
{"spectrum_m", gen_params.cache_params.spectrum_m},
2444+
{"spectrum_lam", gen_params.cache_params.spectrum_lam},
2445+
{"spectrum_window_size", gen_params.cache_params.spectrum_window_size},
2446+
{"spectrum_flex_window", gen_params.cache_params.spectrum_flex_window},
2447+
{"spectrum_warmup_steps", gen_params.cache_params.spectrum_warmup_steps},
2448+
{"spectrum_stop_percent", gen_params.cache_params.spectrum_stop_percent},
2449+
};
2450+
}
2451+
2452+
if (gen_params.vae_tiling_params.enabled) {
2453+
root["vae_tiling"] = {
2454+
{"enabled", gen_params.vae_tiling_params.enabled},
2455+
{"tile_size_x", gen_params.vae_tiling_params.tile_size_x},
2456+
{"tile_size_y", gen_params.vae_tiling_params.tile_size_y},
2457+
{"target_overlap", gen_params.vae_tiling_params.target_overlap},
2458+
{"rel_size_x", gen_params.vae_tiling_params.rel_size_x},
2459+
{"rel_size_y", gen_params.vae_tiling_params.rel_size_y},
2460+
};
2461+
}
2462+
2463+
return root.dump();
2464+
}
2465+
2466+
std::string get_image_params(const SDContextParams& ctx_params,
2467+
const SDGenerationParams& gen_params,
2468+
int64_t seed,
2469+
SDMode mode) {
22852470
std::string parameter_string;
22862471
if (gen_params.prompt_with_lora.size() != 0) {
22872472
parameter_string += gen_params.prompt_with_lora + "\n";
@@ -2294,7 +2479,7 @@ std::string get_image_params(const SDContextParams& ctx_params, const SDGenerati
22942479
parameter_string += "Steps: " + std::to_string(gen_params.sample_params.sample_steps) + ", ";
22952480
parameter_string += "CFG scale: " + std::to_string(gen_params.sample_params.guidance.txt_cfg) + ", ";
22962481
if (gen_params.sample_params.guidance.slg.scale != 0 && gen_params.skip_layers.size() != 0) {
2297-
parameter_string += "SLG scale: " + std::to_string(gen_params.sample_params.guidance.txt_cfg) + ", ";
2482+
parameter_string += "SLG scale: " + std::to_string(gen_params.sample_params.guidance.slg.scale) + ", ";
22982483
parameter_string += "Skip layers: [";
22992484
for (const auto& layer : gen_params.skip_layers) {
23002485
parameter_string += std::to_string(layer) + ", ";
@@ -2347,5 +2532,6 @@ std::string get_image_params(const SDContextParams& ctx_params, const SDGenerati
23472532
parameter_string += "Denoising strength: " + std::to_string(gen_params.hires_denoising_strength) + ", ";
23482533
}
23492534
parameter_string += "Version: stable-diffusion.cpp";
2535+
parameter_string += ", SDCPP: " + build_sdcpp_image_metadata_json(ctx_params, gen_params, seed, mode);
23502536
return parameter_string;
23512537
}

examples/common/common.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,13 @@ struct SDGenerationParams {
249249
};
250250

251251
std::string version_string();
252-
std::string get_image_params(const SDContextParams& ctx_params, const SDGenerationParams& gen_params, int64_t seed);
252+
std::string build_sdcpp_image_metadata_json(const SDContextParams& ctx_params,
253+
const SDGenerationParams& gen_params,
254+
int64_t seed,
255+
SDMode mode = IMG_GEN);
256+
std::string get_image_params(const SDContextParams& ctx_params,
257+
const SDGenerationParams& gen_params,
258+
int64_t seed,
259+
SDMode mode = IMG_GEN);
253260

254261
#endif // __EXAMPLES_COMMON_COMMON_H__

0 commit comments

Comments
 (0)