Skip to content

Commit b8b8bb8

Browse files
authored
Merge branch 'leejet:master' into patch-2
2 parents d55b6ea + 19d876e commit b8b8bb8

File tree

10 files changed

+1010
-366
lines changed

10 files changed

+1010
-366
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ endif()
9696
if(SD_SYCL)
9797
message("-- Use SYCL as backend stable-diffusion")
9898
set(GGML_SYCL ON)
99+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing -fsycl")
99100
add_definitions(-DSD_USE_SYCL)
100101
# disable fast-math on host, see:
101102
# https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-10/fp-model-fp.html

Dockerfile.musa

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG MUSA_VERSION=rc3.1.0
1+
ARG MUSA_VERSION=rc3.1.1
22

33
FROM mthreads/musa:${MUSA_VERSION}-devel-ubuntu22.04 as build
44

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ These projects use `stable-diffusion.cpp` as a backend for their image generatio
326326

327327
- [Jellybox](https://jellybox.com)
328328
- [Stable Diffusion GUI](https://github.com/fszontagh/sd.cpp.gui.wx)
329-
- [Stable Diffusion CLI-GUI] (https://github.com/piallai/stable-diffusion.cpp)
329+
- [Stable Diffusion CLI-GUI](https://github.com/piallai/stable-diffusion.cpp)
330330

331331
## Contributors
332332

denoiser.hpp

Lines changed: 370 additions & 1 deletion
Large diffs are not rendered by default.

examples/cli/main.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ const char* sample_method_str[] = {
3939
"ipndm",
4040
"ipndm_v",
4141
"lcm",
42+
"ddim_trailing",
43+
"tcd",
4244
};
4345

4446
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
@@ -93,6 +95,7 @@ struct SDParams {
9395
float min_cfg = 1.0f;
9496
float cfg_scale = 7.0f;
9597
float guidance = 3.5f;
98+
float eta = 0.f;
9699
float style_ratio = 20.f;
97100
int clip_skip = -1; // <= 0 represents unspecified
98101
int width = 512;
@@ -162,6 +165,7 @@ void print_params(SDParams params) {
162165
printf(" cfg_scale: %.2f\n", params.cfg_scale);
163166
printf(" slg_scale: %.2f\n", params.slg_scale);
164167
printf(" guidance: %.2f\n", params.guidance);
168+
printf(" eta: %.2f\n", params.eta);
165169
printf(" clip_skip: %d\n", params.clip_skip);
166170
printf(" width: %d\n", params.width);
167171
printf(" height: %d\n", params.height);
@@ -202,13 +206,16 @@ void print_usage(int argc, const char* argv[]) {
202206
printf(" If not specified, the default is the type of the weight file\n");
203207
printf(" --lora-model-dir [DIR] lora model directory\n");
204208
printf(" -i, --init-img [IMAGE] path to the input image, required by img2img\n");
209+
printf(" --mask [MASK] path to the mask image, required by img2img with mask\n");
205210
printf(" --control-image [IMAGE] path to image condition, control net\n");
206211
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
207212
printf(" -p, --prompt [PROMPT] the prompt to render\n");
208213
printf(" -n, --negative-prompt PROMPT the negative prompt (default: \"\")\n");
209214
printf(" --cfg-scale SCALE unconditional guidance scale: (default: 7.0)\n");
215+
printf(" --guidance SCALE guidance scale for img2img (default: 3.5)\n");
210216
printf(" --slg-scale SCALE skip layer guidance (SLG) scale, only for DiT models: (default: 0)\n");
211217
printf(" 0 means disabled, a value of 2.5 is nice for sd3.5 medium\n");
218+
printf(" --eta SCALE eta in DDIM, only for DDIM and TCD: (default: 0)\n");
212219
printf(" --skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])\n");
213220
printf(" --skip-layer-start START SLG enabling point: (default: 0.01)\n");
214221
printf(" --skip-layer-end END SLG disabling point: (default: 0.2)\n");
@@ -219,7 +226,7 @@ void print_usage(int argc, const char* argv[]) {
219226
printf(" 1.0 corresponds to full destruction of information in init image\n");
220227
printf(" -H, --height H image height, in pixel space (default: 512)\n");
221228
printf(" -W, --width W image width, in pixel space (default: 512)\n");
222-
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm}\n");
229+
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
223230
printf(" sampling method (default: \"euler_a\")\n");
224231
printf(" --steps STEPS number of sample steps (default: 20)\n");
225232
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
@@ -438,6 +445,12 @@ void parse_args(int argc, const char** argv, SDParams& params) {
438445
break;
439446
}
440447
params.guidance = std::stof(argv[i]);
448+
} else if (arg == "--eta") {
449+
if (++i >= argc) {
450+
invalid_arg = true;
451+
break;
452+
}
453+
params.eta = std::stof(argv[i]);
441454
} else if (arg == "--strength") {
442455
if (++i >= argc) {
443456
invalid_arg = true;
@@ -717,6 +730,7 @@ std::string get_image_params(SDParams params, int64_t seed) {
717730
parameter_string += "Skip layer end: " + std::to_string(params.skip_layer_end) + ", ";
718731
}
719732
parameter_string += "Guidance: " + std::to_string(params.guidance) + ", ";
733+
parameter_string += "Eta: " + std::to_string(params.eta) + ", ";
720734
parameter_string += "Seed: " + std::to_string(seed) + ", ";
721735
parameter_string += "Size: " + std::to_string(params.width) + "x" + std::to_string(params.height) + ", ";
722736
parameter_string += "Model: " + sd_basename(params.model_path) + ", ";
@@ -937,6 +951,7 @@ int main(int argc, const char* argv[]) {
937951
params.clip_skip,
938952
params.cfg_scale,
939953
params.guidance,
954+
params.eta,
940955
params.width,
941956
params.height,
942957
params.sample_method,
@@ -1004,6 +1019,7 @@ int main(int argc, const char* argv[]) {
10041019
params.clip_skip,
10051020
params.cfg_scale,
10061021
params.guidance,
1022+
params.eta,
10071023
params.width,
10081024
params.height,
10091025
params.sample_method,

ggml_extend.hpp

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,71 @@
5252
#define __STATIC_INLINE__ static inline
5353
#endif
5454

55+
// n-mode trensor-matrix product
56+
// example: 2-mode product
57+
// A: [ne03, k, ne01, ne00]
58+
// B: k rows, m columns => [k, m]
59+
// result is [ne03, m, ne01, ne00]
60+
__STATIC_INLINE__ struct ggml_tensor* ggml_mul_n_mode(struct ggml_context* ctx, struct ggml_tensor* a, struct ggml_tensor* b, int mode = 0) {
61+
// reshape A
62+
// swap 0th and nth axis
63+
a = ggml_cont(ctx, ggml_permute(ctx, a, mode, mode != 1 ? 1 : 0, mode != 2 ? 2 : 0, mode != 3 ? 3 : 0));
64+
int ne1 = a->ne[1];
65+
int ne2 = a->ne[2];
66+
int ne3 = a->ne[3];
67+
// make 2D
68+
a = ggml_cont(ctx, ggml_reshape_2d(ctx, a, a->ne[0], (ne3 * ne2 * ne1)));
69+
70+
struct ggml_tensor* result = ggml_cont(ctx, ggml_transpose(ctx, ggml_mul_mat(ctx, a, b)));
71+
72+
// reshape output (same shape as a after permutation except first dim)
73+
result = ggml_reshape_4d(ctx, result, result->ne[0], ne1, ne2, ne3);
74+
// swap back 0th and nth axis
75+
result = ggml_permute(ctx, result, mode, mode != 1 ? 1 : 0, mode != 2 ? 2 : 0, mode != 3 ? 3 : 0);
76+
return result;
77+
}
78+
79+
__STATIC_INLINE__ struct ggml_tensor* ggml_merge_lora(ggml_context* ctx, struct ggml_tensor* lora_down, struct ggml_tensor* lora_up, struct ggml_tensor* lora_mid = NULL) {
80+
struct ggml_tensor* updown;
81+
// flat lora tensors to multiply it
82+
int64_t lora_up_rows = lora_up->ne[ggml_n_dims(lora_up) - 1];
83+
lora_up = ggml_reshape_2d(ctx, lora_up, ggml_nelements(lora_up) / lora_up_rows, lora_up_rows);
84+
auto lora_down_n_dims = ggml_n_dims(lora_down);
85+
// assume n_dims should always be a multiple of 2 (otherwise rank 1 doesn't work)
86+
lora_down_n_dims = (lora_down_n_dims + lora_down_n_dims % 2);
87+
int64_t lora_down_rows = lora_down->ne[lora_down_n_dims - 1];
88+
lora_down = ggml_reshape_2d(ctx, lora_down, ggml_nelements(lora_down) / lora_down_rows, lora_down_rows);
89+
90+
// ggml_mul_mat requires tensor b transposed
91+
lora_down = ggml_cont(ctx, ggml_transpose(ctx, lora_down));
92+
if (lora_mid == NULL) {
93+
updown = ggml_mul_mat(ctx, lora_up, lora_down);
94+
updown = ggml_cont(ctx, ggml_transpose(ctx, updown));
95+
} else {
96+
// undoing tucker decomposition for conv layers.
97+
// lora_mid has shape (3, 3, Rank, Rank)
98+
// lora_down has shape (Rank, In, 1, 1)
99+
// lora_up has shape (Rank, Out, 1, 1)
100+
// conv layer shape is (3, 3, Out, In)
101+
updown = ggml_mul_n_mode(ctx, ggml_mul_n_mode(ctx, lora_mid, lora_down, 3), lora_up, 2);
102+
updown = ggml_cont(ctx, updown);
103+
}
104+
return updown;
105+
}
106+
107+
// Kronecker product
108+
// [ne03,ne02,ne01,ne00] x [ne13,ne12,ne11,ne10] => [ne03*ne13,ne02*ne12,ne01*ne11,ne00*ne10]
109+
__STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct ggml_tensor* a, struct ggml_tensor* b) {
110+
return ggml_mul(ctx,
111+
ggml_upscale_ext(ctx,
112+
a,
113+
a->ne[0] * b->ne[0],
114+
a->ne[1] * b->ne[1],
115+
a->ne[2] * b->ne[2],
116+
a->ne[3] * b->ne[3]),
117+
b);
118+
}
119+
55120
__STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const char* text, void* user_data) {
56121
(void)level;
57122
(void)user_data;
@@ -318,8 +383,10 @@ __STATIC_INLINE__ void sd_apply_mask(struct ggml_tensor* image_data,
318383
for (int ix = 0; ix < width; ix++) {
319384
for (int iy = 0; iy < height; iy++) {
320385
float m = ggml_tensor_get_f32(mask, ix, iy);
386+
m = round(m); // inpaint models need binary masks
387+
ggml_tensor_set_f32(mask, m, ix, iy);
321388
for (int k = 0; k < channels; k++) {
322-
float value = ((float)(m < 254.5/255)) * (ggml_tensor_get_f32(image_data, ix, iy, k) - .5) + .5;
389+
float value = (1 - m) * (ggml_tensor_get_f32(image_data, ix, iy, k) - .5) + .5;
323390
ggml_tensor_set_f32(output, value, ix, iy, k);
324391
}
325392
}
@@ -987,8 +1054,8 @@ __STATIC_INLINE__ size_t ggml_tensor_num(ggml_context* ctx) {
9871054
}
9881055

9891056
/* SDXL with LoRA requires more space */
990-
#define MAX_PARAMS_TENSOR_NUM 15360
991-
#define MAX_GRAPH_SIZE 15360
1057+
#define MAX_PARAMS_TENSOR_NUM 32768
1058+
#define MAX_GRAPH_SIZE 32768
9921059

9931060
struct GGMLRunner {
9941061
protected:

0 commit comments

Comments
 (0)