@@ -168,24 +168,21 @@ struct AYSSchedule : SigmaSchedule {
168168 std::vector<float > inputs;
169169 std::vector<float > results (n + 1 );
170170
171- switch (version) {
172- case VERSION_SD2: /* fallthrough */
173- LOG_WARN (" AYS not designed for SD2.X models" );
174- case VERSION_SD1:
175- LOG_INFO (" AYS using SD1.5 noise levels" );
176- inputs = noise_levels[0 ];
177- break ;
178- case VERSION_SDXL:
179- LOG_INFO (" AYS using SDXL noise levels" );
180- inputs = noise_levels[1 ];
181- break ;
182- case VERSION_SVD:
183- LOG_INFO (" AYS using SVD noise levels" );
184- inputs = noise_levels[2 ];
185- break ;
186- default :
187- LOG_ERROR (" Version not compatable with AYS scheduler" );
188- return results;
171+ if (sd_version_is_sd2 ((SDVersion)version)) {
172+ LOG_WARN (" AYS not designed for SD2.X models" );
173+ } /* fallthrough */
174+ else if (sd_version_is_sd1 ((SDVersion)version)) {
175+ LOG_INFO (" AYS using SD1.5 noise levels" );
176+ inputs = noise_levels[0 ];
177+ } else if (sd_version_is_sdxl ((SDVersion)version)) {
178+ LOG_INFO (" AYS using SDXL noise levels" );
179+ inputs = noise_levels[1 ];
180+ } else if (version == VERSION_SVD) {
181+ LOG_INFO (" AYS using SVD noise levels" );
182+ inputs = noise_levels[2 ];
183+ } else {
184+ LOG_ERROR (" Version not compatable with AYS scheduler" );
185+ return results;
189186 }
190187
191188 /* Stretches those pre-calculated reference levels out to the desired
@@ -346,6 +343,31 @@ struct CompVisVDenoiser : public CompVisDenoiser {
346343 }
347344};
348345
346+ struct EDMVDenoiser : public CompVisVDenoiser {
347+ float min_sigma = 0.002 ;
348+ float max_sigma = 120.0 ;
349+
350+ EDMVDenoiser (float min_sigma = 0.002 , float max_sigma = 120.0 ) : min_sigma(min_sigma), max_sigma(max_sigma) {
351+ schedule = std::make_shared<ExponentialSchedule>();
352+ }
353+
354+ float t_to_sigma (float t) {
355+ return std::exp (t * 4 /(float )TIMESTEPS);
356+ }
357+
358+ float sigma_to_t (float s) {
359+ return 0.25 * std::log (s);
360+ }
361+
362+ float sigma_min () {
363+ return min_sigma;
364+ }
365+
366+ float sigma_max () {
367+ return max_sigma;
368+ }
369+ };
370+
349371float time_snr_shift (float alpha, float t) {
350372 if (alpha == 1 .0f ) {
351373 return t;
@@ -1019,7 +1041,7 @@ static void sample_k_diffusion(sample_method_t method,
10191041 // also needed to invert the behavior of CompVisDenoiser
10201042 // (k-diffusion's LMSDiscreteScheduler)
10211043 float beta_start = 0 .00085f ;
1022- float beta_end = 0 .0120f ;
1044+ float beta_end = 0 .0120f ;
10231045 std::vector<double > alphas_cumprod;
10241046 std::vector<double > compvis_sigmas;
10251047
@@ -1030,8 +1052,9 @@ static void sample_k_diffusion(sample_method_t method,
10301052 (i == 0 ? 1 .0f : alphas_cumprod[i - 1 ]) *
10311053 (1 .0f -
10321054 std::pow (sqrtf (beta_start) +
1033- (sqrtf (beta_end) - sqrtf (beta_start)) *
1034- ((float )i / (TIMESTEPS - 1 )), 2 ));
1055+ (sqrtf (beta_end) - sqrtf (beta_start)) *
1056+ ((float )i / (TIMESTEPS - 1 )),
1057+ 2 ));
10351058 compvis_sigmas[i] =
10361059 std::sqrt ((1 - alphas_cumprod[i]) /
10371060 alphas_cumprod[i]);
@@ -1061,7 +1084,8 @@ static void sample_k_diffusion(sample_method_t method,
10611084 // - pred_prev_sample -> "x_t-1"
10621085 int timestep =
10631086 roundf (TIMESTEPS -
1064- i * ((float )TIMESTEPS / steps)) - 1 ;
1087+ i * ((float )TIMESTEPS / steps)) -
1088+ 1 ;
10651089 // 1. get previous step value (=t-1)
10661090 int prev_timestep = timestep - TIMESTEPS / steps;
10671091 // The sigma here is chosen to cause the
@@ -1086,10 +1110,9 @@ static void sample_k_diffusion(sample_method_t method,
10861110 float * vec_x = (float *)x->data ;
10871111 for (int j = 0 ; j < ggml_nelements (x); j++) {
10881112 vec_x[j] *= std::sqrt (sigma * sigma + 1 ) /
1089- sigma;
1113+ sigma;
10901114 }
1091- }
1092- else {
1115+ } else {
10931116 // For the subsequent steps after the first one,
10941117 // at this point x = latents or x = sample, and
10951118 // needs to be prescaled with x <- sample / c_in
@@ -1127,9 +1150,8 @@ static void sample_k_diffusion(sample_method_t method,
11271150 float alpha_prod_t = alphas_cumprod[timestep];
11281151 // Note final_alpha_cumprod = alphas_cumprod[0] due to
11291152 // trailing timestep spacing
1130- float alpha_prod_t_prev = prev_timestep >= 0 ?
1131- alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1132- float beta_prod_t = 1 - alpha_prod_t ;
1153+ float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1154+ float beta_prod_t = 1 - alpha_prod_t ;
11331155 // 3. compute predicted original sample from predicted
11341156 // noise also called "predicted x_0" of formula (12)
11351157 // from https://arxiv.org/pdf/2010.02502.pdf
@@ -1145,7 +1167,7 @@ static void sample_k_diffusion(sample_method_t method,
11451167 vec_pred_original_sample[j] =
11461168 (vec_x[j] / std::sqrt (sigma * sigma + 1 ) -
11471169 std::sqrt (beta_prod_t ) *
1148- vec_model_output[j]) *
1170+ vec_model_output[j]) *
11491171 (1 / std::sqrt (alpha_prod_t ));
11501172 }
11511173 }
@@ -1159,8 +1181,8 @@ static void sample_k_diffusion(sample_method_t method,
11591181 // sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
11601182 // sqrt(1 - alpha_t/alpha_t-1)
11611183 float beta_prod_t_prev = 1 - alpha_prod_t_prev;
1162- float variance = (beta_prod_t_prev / beta_prod_t ) *
1163- (1 - alpha_prod_t / alpha_prod_t_prev);
1184+ float variance = (beta_prod_t_prev / beta_prod_t ) *
1185+ (1 - alpha_prod_t / alpha_prod_t_prev);
11641186 float std_dev_t = eta * std::sqrt (variance);
11651187 // 6. compute "direction pointing to x_t" of formula
11661188 // (12) from https://arxiv.org/pdf/2010.02502.pdf
@@ -1179,8 +1201,8 @@ static void sample_k_diffusion(sample_method_t method,
11791201 std::pow (std_dev_t , 2 )) *
11801202 vec_model_output[j];
11811203 vec_x[j] = std::sqrt (alpha_prod_t_prev) *
1182- vec_pred_original_sample[j] +
1183- pred_sample_direction;
1204+ vec_pred_original_sample[j] +
1205+ pred_sample_direction;
11841206 }
11851207 }
11861208 if (eta > 0 ) {
@@ -1208,7 +1230,7 @@ static void sample_k_diffusion(sample_method_t method,
12081230 // by Semi-Linear Consistency Function with Trajectory
12091231 // Mapping", arXiv:2402.19159 [cs.CV]
12101232 float beta_start = 0 .00085f ;
1211- float beta_end = 0 .0120f ;
1233+ float beta_end = 0 .0120f ;
12121234 std::vector<double > alphas_cumprod;
12131235 std::vector<double > compvis_sigmas;
12141236
@@ -1219,8 +1241,9 @@ static void sample_k_diffusion(sample_method_t method,
12191241 (i == 0 ? 1 .0f : alphas_cumprod[i - 1 ]) *
12201242 (1 .0f -
12211243 std::pow (sqrtf (beta_start) +
1222- (sqrtf (beta_end) - sqrtf (beta_start)) *
1223- ((float )i / (TIMESTEPS - 1 )), 2 ));
1244+ (sqrtf (beta_end) - sqrtf (beta_start)) *
1245+ ((float )i / (TIMESTEPS - 1 )),
1246+ 2 ));
12241247 compvis_sigmas[i] =
12251248 std::sqrt ((1 - alphas_cumprod[i]) /
12261249 alphas_cumprod[i]);
@@ -1235,13 +1258,10 @@ static void sample_k_diffusion(sample_method_t method,
12351258 for (int i = 0 ; i < steps; i++) {
12361259 // Analytic form for TCD timesteps
12371260 int timestep = TIMESTEPS - 1 -
1238- (TIMESTEPS / original_steps) *
1239- (int )floor (i * ((float )original_steps / steps));
1261+ (TIMESTEPS / original_steps) *
1262+ (int )floor (i * ((float )original_steps / steps));
12401263 // 1. get previous step value
1241- int prev_timestep = i >= steps - 1 ? 0 :
1242- TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
1243- (int )floor ((i + 1 ) *
1244- ((float )original_steps / steps));
1264+ int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int )floor ((i + 1 ) * ((float )original_steps / steps));
12451265 // Here timestep_s is tau_n' in Algorithm 4. The _s
12461266 // notation appears to be that from C. Lu,
12471267 // "DPM-Solver: A Fast ODE Solver for Diffusion
@@ -1258,10 +1278,9 @@ static void sample_k_diffusion(sample_method_t method,
12581278 float * vec_x = (float *)x->data ;
12591279 for (int j = 0 ; j < ggml_nelements (x); j++) {
12601280 vec_x[j] *= std::sqrt (sigma * sigma + 1 ) /
1261- sigma;
1281+ sigma;
12621282 }
1263- }
1264- else {
1283+ } else {
12651284 float * vec_x = (float *)x->data ;
12661285 for (int j = 0 ; j < ggml_nelements (x); j++) {
12671286 vec_x[j] *= std::sqrt (sigma * sigma + 1 );
@@ -1294,15 +1313,14 @@ static void sample_k_diffusion(sample_method_t method,
12941313 // DPM-Solver. In fact, we have alpha_{t_n} =
12951314 // \sqrt{\hat{alpha_n}}, [...]"
12961315 float alpha_prod_t = alphas_cumprod[timestep];
1297- float beta_prod_t = 1 - alpha_prod_t ;
1316+ float beta_prod_t = 1 - alpha_prod_t ;
12981317 // Note final_alpha_cumprod = alphas_cumprod[0] since
12991318 // TCD is always "trailing"
1300- float alpha_prod_t_prev = prev_timestep >= 0 ?
1301- alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1319+ float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
13021320 // The subscript _s are the only portion in this
13031321 // section (2) unique to TCD
13041322 float alpha_prod_s = alphas_cumprod[timestep_s];
1305- float beta_prod_s = 1 - alpha_prod_s;
1323+ float beta_prod_s = 1 - alpha_prod_s;
13061324 // 3. Compute the predicted noised sample x_s based on
13071325 // the model parameterization
13081326 //
@@ -1317,7 +1335,7 @@ static void sample_k_diffusion(sample_method_t method,
13171335 vec_pred_original_sample[j] =
13181336 (vec_x[j] / std::sqrt (sigma * sigma + 1 ) -
13191337 std::sqrt (beta_prod_t ) *
1320- vec_model_output[j]) *
1338+ vec_model_output[j]) *
13211339 (1 / std::sqrt (alpha_prod_t ));
13221340 }
13231341 }
@@ -1339,9 +1357,9 @@ static void sample_k_diffusion(sample_method_t method,
13391357 // pred_epsilon = model_output
13401358 vec_x[j] =
13411359 std::sqrt (alpha_prod_s) *
1342- vec_pred_original_sample[j] +
1360+ vec_pred_original_sample[j] +
13431361 std::sqrt (beta_prod_s) *
1344- vec_model_output[j];
1362+ vec_model_output[j];
13451363 }
13461364 }
13471365 // 4. Sample and inject noise z ~ N(0, I) for
@@ -1357,7 +1375,7 @@ static void sample_k_diffusion(sample_method_t method,
13571375 // In this case, x is still pred_noised_sample,
13581376 // continue in-place
13591377 ggml_tensor_set_f32_randn (noise, rng);
1360- float * vec_x = (float *)x->data ;
1378+ float * vec_x = (float *)x->data ;
13611379 float * vec_noise = (float *)noise->data ;
13621380 for (int j = 0 ; j < ggml_nelements (x); j++) {
13631381 // Corresponding to (35) in Zheng et
@@ -1366,10 +1384,10 @@ static void sample_k_diffusion(sample_method_t method,
13661384 vec_x[j] =
13671385 std::sqrt (alpha_prod_t_prev /
13681386 alpha_prod_s) *
1369- vec_x[j] +
1387+ vec_x[j] +
13701388 std::sqrt (1 - alpha_prod_t_prev /
1371- alpha_prod_s) *
1372- vec_noise[j];
1389+ alpha_prod_s) *
1390+ vec_noise[j];
13731391 }
13741392 }
13751393 }
0 commit comments