@@ -800,11 +800,11 @@ class StableDiffusionGGML {
800800 const std::vector<float >& sigmas,
801801 int start_merge_step,
802802 SDCondition id_cond,
803- std::vector< int > skip_layers = {},
804- float slg_scale = 0 ,
805- float skip_layer_start = 0.01 ,
806- float skip_layer_end = 0.2 ,
807- ggml_tensor* noise_mask = nullptr ) {
803+ sd_slg_params_t slg_params = {NULL , 0 , 0 , 0 , 0 },
804+ sd_apg_params_t apg_params = { 1 , 0 , 0 } ,
805+ ggml_tensor* noise_mask = nullptr ) {
806+ std::vector< int > skip_layers (slg_params. skip_layers , slg_params. skip_layers + slg_params. skip_layers_count );
807+
808808 LOG_DEBUG (" Sample" );
809809 struct ggml_init_params params;
810810 size_t data_size = ggml_row_size (init_latent->type , init_latent->ne [0 ]);
@@ -827,7 +827,7 @@ class StableDiffusionGGML {
827827 struct ggml_tensor * noised_input = ggml_dup_tensor (work_ctx, noise);
828828
829829 bool has_unconditioned = cfg_scale != 1.0 && uncond.c_crossattn != NULL ;
830- bool has_skiplayer = slg_scale != 0.0 && skip_layers.size () > 0 ;
830+ bool has_skiplayer = slg_params. scale != 0.0 && skip_layers.size () > 0 ;
831831
832832 // denoise wrapper
833833 struct ggml_tensor * out_cond = ggml_dup_tensor (work_ctx, x);
@@ -847,13 +847,8 @@ class StableDiffusionGGML {
847847 }
848848 struct ggml_tensor * denoised = ggml_dup_tensor (work_ctx, x);
849849
850- // TODO do not hardcode
851- float apg_eta = .08f ;
852- float apg_momentum = -.5f ;
853- float apg_norm_treshold = 15 .0f ;
854-
855850 std::vector<float > apg_momentum_buffer;
856- if (apg_momentum != 0 )
851+ if (apg_params. momentum != 0 )
857852 apg_momentum_buffer.resize ((size_t )ggml_nelements (denoised));
858853
859854 auto denoise = [&](ggml_tensor* input, float sigma, int step) -> ggml_tensor* {
@@ -936,7 +931,7 @@ class StableDiffusionGGML {
936931 }
937932
938933 int step_count = sigmas.size ();
939- bool is_skiplayer_step = has_skiplayer && step > (int )(skip_layer_start * step_count) && step < (int )(skip_layer_end * step_count);
934+ bool is_skiplayer_step = has_skiplayer && step > (int )(slg_params. skip_layer_start * step_count) && step < (int )(slg_params. skip_layer_end * step_count);
940935 float * skip_layer_data = NULL ;
941936 if (is_skiplayer_step) {
942937 LOG_DEBUG (" Skipping layers at step %d\n " , step);
@@ -970,37 +965,37 @@ class StableDiffusionGGML {
970965 float dot = 0 ;
971966 for (int i = 0 ; i < ne_elements; i++) {
972967 float delta = positive_data[i] - negative_data[i];
973- if (apg_momentum != 0 ) {
974- delta += apg_momentum * apg_momentum_buffer[i];
968+ if (apg_params. momentum != 0 ) {
969+ delta += apg_params. momentum * apg_momentum_buffer[i];
975970 apg_momentum_buffer[i] = delta;
976971 }
977- if (apg_norm_treshold > 0 ) {
972+ if (apg_params. norm_treshold > 0 ) {
978973 diff_norm += delta * delta;
979974 }
980- if (apg_eta != 1 .0f ) {
975+ if (apg_params. eta != 1 .0f ) {
981976 cond_norm_sq += positive_data[i] * positive_data[i];
982977 dot += positive_data[i] * delta;
983978 }
984979 deltas[i] = delta;
985980 }
986- if (apg_norm_treshold > 0 ) {
981+ if (apg_params. norm_treshold > 0 ) {
987982 diff_norm = std::sqrtf (diff_norm);
988- apg_scale_factor = std::min (1 .0f , apg_norm_treshold / diff_norm);
983+ apg_scale_factor = std::min (1 .0f , apg_params. norm_treshold / diff_norm);
989984 }
990- if (apg_eta != 1 .0f ) {
985+ if (apg_params. eta != 1 .0f ) {
991986 dot *= apg_scale_factor;
992987 // pre-normalize (avoids one square root and ne_elements extra divs)
993988 dot /= cond_norm_sq;
994989 }
995990
996991 for (int i = 0 ; i < ne_elements; i++) {
997992 deltas[i] *= apg_scale_factor;
998- if (apg_eta != 1 .0f ) {
993+ if (apg_params. eta != 1 .0f ) {
999994 float apg_parallel = dot * positive_data[i];
1000995 float apg_orthogonal = deltas[i] - apg_parallel;
1001996
1002997 // tweak deltas
1003- deltas[i] = apg_orthogonal + apg_eta * apg_parallel;
998+ deltas[i] = apg_orthogonal + apg_params. eta * apg_parallel;
1004999 }
10051000 }
10061001
@@ -1019,7 +1014,7 @@ class StableDiffusionGGML {
10191014 }
10201015 }
10211016 if (is_skiplayer_step) {
1022- latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_scale ;
1017+ latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_params. scale ;
10231018 }
10241019 // v = latent_result, eps = latent_result
10251020 // denoised = (v * c_out + input * c_skip) or (input + eps * c_out)
@@ -1265,11 +1260,9 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
12651260 float style_ratio,
12661261 bool normalize_input,
12671262 std::string input_id_images_path,
1268- std::vector<int > skip_layers = {},
1269- float slg_scale = 0 ,
1270- float skip_layer_start = 0.01 ,
1271- float skip_layer_end = 0.2 ,
1272- ggml_tensor* masked_image = NULL ) {
1263+ sd_slg_params_t slg_params,
1264+ sd_apg_params_t apg_params,
1265+ ggml_tensor* masked_image = NULL ) {
12731266 if (seed < 0 ) {
12741267 // Generally, when using the provided command line, the seed is always >0.
12751268 // However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library
@@ -1522,10 +1515,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
15221515 sigmas,
15231516 start_merge_step,
15241517 id_cond,
1525- skip_layers,
1526- slg_scale,
1527- skip_layer_start,
1528- skip_layer_end,
1518+ slg_params,
1519+ apg_params,
15291520 noise_mask);
15301521
15311522 // struct ggml_tensor* x_0 = load_tensor_from_file(ctx, "samples_ddim.bin");
@@ -1595,12 +1586,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
15951586 float style_ratio,
15961587 bool normalize_input,
15971588 const char * input_id_images_path_c_str,
1598- int * skip_layers = NULL ,
1599- size_t skip_layers_count = 0 ,
1600- float slg_scale = 0 ,
1601- float skip_layer_start = 0.01 ,
1602- float skip_layer_end = 0.2 ) {
1603- std::vector<int > skip_layers_vec (skip_layers, skip_layers + skip_layers_count);
1589+ sd_slg_params_t slg_params,
1590+ sd_apg_params_t apg_params) {
16041591 LOG_DEBUG (" txt2img %dx%d" , width, height);
16051592 if (sd_ctx == NULL ) {
16061593 return NULL ;
@@ -1674,10 +1661,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
16741661 style_ratio,
16751662 normalize_input,
16761663 input_id_images_path_c_str,
1677- skip_layers_vec,
1678- slg_scale,
1679- skip_layer_start,
1680- skip_layer_end);
1664+ slg_params,
1665+ apg_params);
16811666
16821667 size_t t1 = ggml_time_ms ();
16831668
@@ -1707,12 +1692,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
17071692 float style_ratio,
17081693 bool normalize_input,
17091694 const char * input_id_images_path_c_str,
1710- int * skip_layers = NULL ,
1711- size_t skip_layers_count = 0 ,
1712- float slg_scale = 0 ,
1713- float skip_layer_start = 0.01 ,
1714- float skip_layer_end = 0.2 ) {
1715- std::vector<int > skip_layers_vec (skip_layers, skip_layers + skip_layers_count);
1695+ sd_slg_params_t slg_params,
1696+ sd_apg_params_t apg_params) {
17161697 LOG_DEBUG (" img2img %dx%d" , width, height);
17171698 if (sd_ctx == NULL ) {
17181699 return NULL ;
@@ -1854,10 +1835,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
18541835 style_ratio,
18551836 normalize_input,
18561837 input_id_images_path_c_str,
1857- skip_layers_vec,
1858- slg_scale,
1859- skip_layer_start,
1860- skip_layer_end,
1838+ slg_params,
1839+ apg_params,
18611840 masked_image);
18621841
18631842 size_t t2 = ggml_time_ms ();
0 commit comments