set vae tile size via env var

stduhpf · stduhpf · commit dc990a7ca289 · 2025-09-11T12:13:03.000+02:00
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -1298,14 +1298,27 @@ class StableDiffusionGGML {
     ggml_tensor* encode_first_stage(ggml_context* work_ctx, ggml_tensor* x, bool decode_video = false) {
         int64_t t0          = ggml_time_ms();
         ggml_tensor* result = NULL;
+        int tile_size = 32;
+        // TODO: arg instead of env?
+        const char* SD_TILE_SIZE = getenv("SD_TILE_SIZE");
+        if (SD_TILE_SIZE != nullptr) {
+            std::string sd_tile_size_str = SD_TILE_SIZE;
+            try {
+                tile_size = std::stoi(sd_tile_size_str);
+            } catch (const std::invalid_argument&) {
+                LOG_WARN("Invalid");
+            } catch (const std::out_of_range&) {
+                LOG_WARN("OOR");
+            }
+        }
         if (!use_tiny_autoencoder) {
             process_vae_input_tensor(x);
             if (vae_tiling && !decode_video) {
                 // split latent in 32x32 tiles and compute in several steps
                 auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
                     first_stage_model->compute(n_threads, in, true, &out, NULL);
                 };
-                sd_tiling(x, result, 8, 32, 0.5f, on_tiling, false);
+                sd_tiling(x, result, 8, tile_size, 0.5f, on_tiling, false);
             } else {
                 first_stage_model->compute(n_threads, x, false, &result, work_ctx);
             }
@@ -1428,7 +1441,19 @@ class StableDiffusionGGML {
                                         C,
                                         x->ne[3]);
         }
-
+        int tile_size = 32;
+        // TODO: arg instead of env?
+        const char* SD_TILE_SIZE = getenv("SD_TILE_SIZE");
+        if (SD_TILE_SIZE != nullptr) {
+            std::string sd_tile_size_str = SD_TILE_SIZE;
+            try {
+                tile_size = std::stoi(sd_tile_size_str);
+            } catch (const std::invalid_argument&) {
+                LOG_WARN("Invalid");
+            } catch (const std::out_of_range&) {
+                LOG_WARN("OOR");
+            }
+        }
         int64_t t0 = ggml_time_ms();
         if (!use_tiny_autoencoder) {
             process_latent_out(x);
@@ -1438,7 +1463,7 @@ class StableDiffusionGGML {
                 auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
                     first_stage_model->compute(n_threads, in, true, &out, NULL);
                 };
-                sd_tiling(x, result, 8, 32, 0.5f, on_tiling, true);
+                sd_tiling(x, result, 8, tile_size, 0.5f, on_tiling, true);
             } else {
                 first_stage_model->compute(n_threads, x, true, &result, work_ctx);
             }