perf(backend): clear torch cache after encoding each image in kontext extension

psychedelicious · psychedelicious · commit cf7fa5bce81d · 2025-08-12T10:51:05.000+10:00
Slightly reduces VRAM allocations.
diff --git a/invokeai/backend/flux/extensions/kontext_extension.py b/invokeai/backend/flux/extensions/kontext_extension.py
@@ -144,6 +144,7 @@ def _prepare_kontext(self) -> tuple[torch.Tensor, torch.Tensor]:
                 image_tensor = image_tensor.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
                 # Use sample=False to get the distribution mean without noise
                 kontext_latents_unpacked = vae.encode(image_tensor, sample=False)
+                TorchDevice.empty_cache()
 
             # Extract tensor dimensions
             batch_size, _, latent_height, latent_width = kontext_latents_unpacked.shape
@@ -169,11 +170,11 @@ def _prepare_kontext(self) -> tuple[torch.Tensor, torch.Tensor]:
                 # Option 1: Tile vertically (below existing content)
                 potential_h_vertical = canvas_h + latent_height
                 potential_w_vertical = max(canvas_w, latent_width)
-                
+
                 # Option 2: Tile horizontally (to the right of existing content)
                 potential_h_horizontal = max(canvas_h, latent_height)
                 potential_w_horizontal = canvas_w + latent_width
-                
+
                 # Choose arrangement that minimizes the maximum dimension
                 # This keeps the canvas closer to square, optimizing attention computation
                 if potential_h_vertical > potential_w_horizontal: