Skip to content

Commit df77a12

Browse files
refactor(backend): use torchvision transforms for Kontext image preprocessing
Replace numpy-based normalization with torchvision transforms for consistency with other image processing in the codebase
1 parent faf662d commit df77a12

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

invokeai/backend/flux/extensions/kontext_extension.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
import einops
2-
import numpy as np
31
import torch
2+
import torchvision.transforms as T
43
from einops import repeat
54
from PIL import Image
65

@@ -136,10 +135,17 @@ def _prepare_kontext(self) -> tuple[torch.Tensor, torch.Tensor]:
136135
# Use BICUBIC for smoother resizing to reduce artifacts
137136
image = image.resize((final_width, final_height), Image.Resampling.BICUBIC)
138137

139-
# Convert to tensor with same normalization as BFL
140-
image_np = np.array(image)
141-
image_tensor = torch.from_numpy(image_np).float() / 127.5 - 1.0
142-
image_tensor = einops.rearrange(image_tensor, "h w c -> 1 c h w")
138+
# Convert to tensor using torchvision transforms for consistency
139+
# This matches the normalization used in image_resized_to_grid_as_tensor
140+
transformation = T.Compose(
141+
[
142+
T.ToTensor(), # Converts PIL image to tensor and scales to [0, 1]
143+
]
144+
)
145+
image_tensor = transformation(image)
146+
# Convert from [0, 1] to [-1, 1] range expected by VAE
147+
image_tensor = image_tensor * 2.0 - 1.0
148+
image_tensor = image_tensor.unsqueeze(0) # Add batch dimension
143149
image_tensor = image_tensor.to(self._device)
144150

145151
# Continue with VAE encoding

0 commit comments

Comments
 (0)