From ebe5be2b13b8c43e4a0840549167fbfc1631826b Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Sun, 6 Jul 2025 19:41:46 -0700
Subject: [PATCH 01/15] add kontext support

---
 optimum/intel/__init__.py                         |  2 ++
 optimum/intel/openvino/__init__.py                |  1 +
 optimum/intel/openvino/modeling_diffusion.py      | 15 ++++++++++++++-
 .../utils/dummy_openvino_and_diffusers_objects.py | 10 ++++++++++
 4 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py
index b49f17944a..cc08e3b09b 100644
--- a/optimum/intel/__init__.py
+++ b/optimum/intel/__init__.py
@@ -124,6 +124,7 @@
         "OVFluxImg2ImgPipeline",
         "OVFluxInpaintPipeline",
         "OVFluxFillPipeline",
+        "OVFluxKontextPipeline",
         "OVSanaPipeline",
         "OVPipelineForImage2Image",
         "OVPipelineForText2Image",
@@ -150,6 +151,7 @@
             "OVFluxImg2ImgPipeline",
             "OVFluxInpaintPipeline",
             "OVFluxFillPipeline",
+            "OVFluxKontextPipeline",
             "OVSanaPipeline",
             "OVPipelineForImage2Image",
             "OVPipelineForText2Image",
diff --git a/optimum/intel/openvino/__init__.py b/optimum/intel/openvino/__init__.py
index bc1266467b..f9a6f39c52 100644
--- a/optimum/intel/openvino/__init__.py
+++ b/optimum/intel/openvino/__init__.py
@@ -91,6 +91,7 @@
         OVFluxImg2ImgPipeline,
         OVFluxInpaintPipeline,
         OVFluxPipeline,
+        OVFluxKontextPipeline,
         OVLatentConsistencyModelImg2ImgPipeline,
         OVLatentConsistencyModelPipeline,
         OVLTXPipeline,
diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
index 06321a14ae..b371c89b32 100644
--- a/optimum/intel/openvino/modeling_diffusion.py
+++ b/optimum/intel/openvino/modeling_diffusion.py
@@ -120,6 +120,11 @@
     from diffusers import SanaSprintPipeline
 else:
     SanaSprintPipeline = object
+    
+if is_diffusers_version(">", "0.34.0"):
+    from diffusers import FluxKontextPipeline
+else:
+    FluxKontextPipeline = object
 
 
 DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER = "transformer"
@@ -1659,12 +1664,15 @@ class OVFluxInpaintPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin,
     export_feature = "inpainting"
     auto_model_class = FluxInpaintPipeline
 
-
 class OVFluxFillPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxFillPipeline):
     main_input_name = "image"
     export_feature = "inpainting"
     auto_model_class = FluxFillPipeline
 
+class OVFluxKontextPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxKontextPipeline):
+    main_input_name = "image"
+    export_feature = "image-to-image"
+    auto_model_class = FluxKontextPipeline
 
 class OVSanaPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, SanaPipeline):
     main_input_name = "prompt"
@@ -1768,6 +1776,11 @@ def _get_ov_class(pipeline_class_name: str, throw_error_if_not_exist: bool = Tru
 if is_diffusers_version(">=", "0.33.0"):
     SUPPORTED_OV_PIPELINES.append(OVSanaSprintPipeline)
     OV_TEXT2IMAGE_PIPELINES_MAPPING["sana-sprint"] = OVSanaSprintPipeline
+    
+    
+if is_diffusers_version(">", "0.34.0"):
+    SUPPORTED_OV_PIPELINES.extend([OVFluxKontextPipeline])
+    OV_IMAGE2IMAGE_PIPELINES_MAPPING["flux"] = OVFluxKontextPipeline
 
 SUPPORTED_OV_PIPELINES_MAPPINGS = [
     OV_TEXT2IMAGE_PIPELINES_MAPPING,
diff --git a/optimum/intel/utils/dummy_openvino_and_diffusers_objects.py b/optimum/intel/utils/dummy_openvino_and_diffusers_objects.py
index ed38231e08..4845732922 100644
--- a/optimum/intel/utils/dummy_openvino_and_diffusers_objects.py
+++ b/optimum/intel/utils/dummy_openvino_and_diffusers_objects.py
@@ -246,6 +246,16 @@ def from_pretrained(cls, *args, **kwargs):
         requires_backends(cls, ["openvino", "diffusers"])
 
 
+class OVFluxKontextPipeline(metaclass=DummyObject):
+    _backends = ["openvino", "diffusers"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["openvino", "diffusers"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["openvino", "diffusers"])
+        
 class OVSanaPipeline(metaclass=DummyObject):
     _backends = ["openvino", "diffusers"]
 

From d2a5a92a210d027e7377973ec3a9cb8387b5a8cc Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Sun, 6 Jul 2025 19:45:46 -0700
Subject: [PATCH 02/15] add kontext support

---
 optimum/intel/__init__.py                     |   8 +-
 optimum/intel/openvino/modeling_diffusion.py  | 206 +++++-------------
 .../dummy_openvino_and_diffusers_objects.py   |   3 +-
 3 files changed, 53 insertions(+), 164 deletions(-)

diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py
index cc08e3b09b..5bbf292f3f 100644
--- a/optimum/intel/__init__.py
+++ b/optimum/intel/__init__.py
@@ -167,9 +167,7 @@
 except OptionalDependencyNotAvailable:
     from .utils import dummy_openvino_objects
 
-    _import_structure["utils.dummy_openvino_objects"] = [
-        name for name in dir(dummy_openvino_objects) if not name.startswith("_")
-    ]
+    _import_structure["utils.dummy_openvino_objects"] = [name for name in dir(dummy_openvino_objects) if not name.startswith("_")]
 else:
     _import_structure["openvino"].extend(
         [
@@ -206,9 +204,7 @@
 except OptionalDependencyNotAvailable:
     from .utils import dummy_neural_compressor_objects
 
-    _import_structure["utils.dummy_neural_compressor_objects"] = [
-        name for name in dir(dummy_neural_compressor_objects) if not name.startswith("_")
-    ]
+    _import_structure["utils.dummy_neural_compressor_objects"] = [name for name in dir(dummy_neural_compressor_objects) if not name.startswith("_")]
 else:
     _import_structure["neural_compressor"] = [
         "INCConfig",
diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
index b371c89b32..907cd4b158 100644
--- a/optimum/intel/openvino/modeling_diffusion.py
+++ b/optimum/intel/openvino/modeling_diffusion.py
@@ -120,7 +120,7 @@
     from diffusers import SanaSprintPipeline
 else:
     SanaSprintPipeline = object
-    
+
 if is_diffusers_version(">", "0.34.0"):
     from diffusers import FluxKontextPipeline
 else:
@@ -201,35 +201,15 @@ def __init__(
                 )
 
         self.unet = OVModelUnet(unet, self, DIFFUSION_MODEL_UNET_SUBFOLDER) if unet is not None else None
-        self.transformer = (
-            OVModelTransformer(transformer, self, DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER)
-            if transformer is not None
-            else None
-        )
+        self.transformer = OVModelTransformer(transformer, self, DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER) if transformer is not None else None
 
         if unet is None and transformer is None:
             raise ValueError("`unet` or `transformer` model should be provided for pipeline work")
         self.vae_decoder = OVModelVaeDecoder(vae_decoder, self, DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER)
-        self.vae_encoder = (
-            OVModelVaeEncoder(vae_encoder, self, DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER)
-            if vae_encoder is not None
-            else None
-        )
-        self.text_encoder = (
-            OVModelTextEncoder(text_encoder, self, DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER)
-            if text_encoder is not None
-            else None
-        )
-        self.text_encoder_2 = (
-            OVModelTextEncoder(text_encoder_2, self, DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER)
-            if text_encoder_2 is not None
-            else None
-        )
-        self.text_encoder_3 = (
-            OVModelTextEncoder(text_encoder_3, self, DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER)
-            if text_encoder_3 is not None
-            else None
-        )
+        self.vae_encoder = OVModelVaeEncoder(vae_encoder, self, DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER) if vae_encoder is not None else None
+        self.text_encoder = OVModelTextEncoder(text_encoder, self, DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER) if text_encoder is not None else None
+        self.text_encoder_2 = OVModelTextEncoder(text_encoder_2, self, DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER) if text_encoder_2 is not None else None
+        self.text_encoder_3 = OVModelTextEncoder(text_encoder_3, self, DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER) if text_encoder_3 is not None else None
         # We wrap the VAE Decoder & Encoder in a single object to simulate diffusers API
         self.vae = OVModelVae(decoder=self.vae_decoder, encoder=self.vae_encoder)
 
@@ -310,9 +290,7 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
                 The directory where to save the model files
         """
         if self._compile_only:
-            raise ValueError(
-                "`save_pretrained()` is not supported with `compile_only` mode, please initialize model without this option"
-            )
+            raise ValueError("`save_pretrained()` is not supported with `compile_only` mode, please initialize model without this option")
 
         save_directory = Path(save_directory)
 
@@ -330,11 +308,7 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
                 dst_path = save_path / OV_XML_FILE_NAME
                 dst_path.parent.mkdir(parents=True, exist_ok=True)
                 openvino.save_model(model.model, dst_path, compress_to_fp16=False)
-                model_dir = (
-                    self.model_save_dir
-                    if not isinstance(self.model_save_dir, TemporaryDirectory)
-                    else self.model_save_dir.name
-                )
+                model_dir = self.model_save_dir if not isinstance(self.model_save_dir, TemporaryDirectory) else self.model_save_dir.name
                 config_path = Path(model_dir) / save_path.name / CONFIG_NAME
                 if config_path.is_file():
                     config_save_path = save_path / CONFIG_NAME
@@ -365,11 +339,7 @@ def _save_config(self, save_directory):
         Saves a model configuration into a directory, so that it can be re-loaded using the
         [`from_pretrained`] class method.
         """
-        model_dir = (
-            self.model_save_dir
-            if not isinstance(self.model_save_dir, TemporaryDirectory)
-            else self.model_save_dir.name
-        )
+        model_dir = self.model_save_dir if not isinstance(self.model_save_dir, TemporaryDirectory) else self.model_save_dir.name
         save_dir = Path(save_directory)
         original_config = Path(model_dir) / self.config_name
         if original_config.exists():
@@ -527,11 +497,7 @@ def _from_pretrained(
             ov_config = kwargs.get("ov_config", {})
             device = kwargs.get("device", "CPU")
             vae_ov_conifg = {**ov_config}
-            if (
-                "GPU" in device.upper()
-                and "INFERENCE_PRECISION_HINT" not in vae_ov_conifg
-                and is_openvino_version("<=", "2025.0")
-            ):
+            if "GPU" in device.upper() and "INFERENCE_PRECISION_HINT" not in vae_ov_conifg and is_openvino_version("<=", "2025.0"):
                 vae_model_path = models["vae_decoder"]
                 required_upcast = check_scale_available(vae_model_path)
                 if required_upcast:
@@ -672,15 +638,10 @@ def to(self, *args, device: Optional[str] = None, dtype: Optional[torch.dtype] =
             self._device = device.upper()
             self.clear_requests()
         elif device is not None:
-            raise ValueError(
-                "The `device` argument should be a string representing the device on which the model should be loaded."
-            )
+            raise ValueError("The `device` argument should be a string representing the device on which the model should be loaded.")
 
         if dtype is not None and dtype != self.dtype:
-            raise NotImplementedError(
-                f"Cannot change the dtype of the model from {self.dtype} to {dtype}. "
-                f"Please export the model with the desired dtype."
-            )
+            raise NotImplementedError(f"Cannot change the dtype of the model from {self.dtype} to {dtype}. " f"Please export the model with the desired dtype.")
 
         return self
 
@@ -690,9 +651,7 @@ def height(self) -> int:
         height = model.inputs[0].get_partial_shape()[-2]
         if height.is_dynamic:
             return -1
-        return height.get_length() * (
-            self.vae_scale_factor if hasattr(self, "vae_scale_factor") else self.vae_spatial_compression_ratio
-        )
+        return height.get_length() * (self.vae_scale_factor if hasattr(self, "vae_scale_factor") else self.vae_spatial_compression_ratio)
 
     @property
     def width(self) -> int:
@@ -700,9 +659,7 @@ def width(self) -> int:
         width = model.inputs[0].get_partial_shape()[-1]
         if width.is_dynamic:
             return -1
-        return width.get_length() * (
-            self.vae_scale_factor if hasattr(self, "vae_scale_factor") else self.vae_spatial_compression_ratio
-        )
+        return width.get_length() * (self.vae_scale_factor if hasattr(self, "vae_scale_factor") else self.vae_spatial_compression_ratio)
 
     @property
     def batch_size(self) -> int:
@@ -798,9 +755,7 @@ def _reshape_transformer(
             elif inputs.get_any_name() == "hidden_states":
                 in_channels = self.transformer.config.get("in_channels", None)
                 if in_channels is None:
-                    in_channels = (
-                        shapes[inputs][1] if inputs.get_partial_shape().rank.get_length() == 4 else shapes[inputs][2]
-                    )
+                    in_channels = shapes[inputs][1] if inputs.get_partial_shape().rank.get_length() == 4 else shapes[inputs][2]
                     if in_channels.is_dynamic:
                         logger.warning(
                             "Could not identify `in_channels` from the unet configuration, to statically reshape the unet please provide a configuration."
@@ -814,11 +769,7 @@ def _reshape_transformer(
             elif inputs.get_any_name() == "pooled_projections":
                 shapes[inputs] = [batch_size, self.transformer.config["pooled_projection_dim"]]
             elif inputs.get_any_name() == "img_ids":
-                shapes[inputs] = (
-                    [batch_size, packed_height_width, 3]
-                    if is_diffusers_version("<", "0.31.0")
-                    else [packed_height_width, 3]
-                )
+                shapes[inputs] = [batch_size, packed_height_width, 3] if is_diffusers_version("<", "0.31.0") else [packed_height_width, 3]
             elif inputs.get_any_name() == "txt_ids":
                 shapes[inputs] = [batch_size, -1, 3] if is_diffusers_version("<", "0.31.0") else [-1, 3]
             elif inputs.get_any_name() in ["height", "width", "num_frames", "rope_interpolation_scale"]:
@@ -892,9 +843,7 @@ def _reshape_vae_decoder(
 
     def reshape(self, batch_size: int, height: int, width: int, num_images_per_prompt: int = -1, num_frames: int = -1):
         if self._compile_only:
-            raise ValueError(
-                "`reshape()` is not supported with `compile_only` mode, please initialize model without this option"
-            )
+            raise ValueError("`reshape()` is not supported with `compile_only` mode, please initialize model without this option")
 
         self.is_dynamic = -1 in {batch_size, height, width, num_images_per_prompt}
 
@@ -905,15 +854,11 @@ def reshape(self, batch_size: int, height: int, width: int, num_images_per_promp
                 tokenizer_max_len = -1
             else:
                 tokenizer_max_len = (
-                    getattr(self.tokenizer, "model_max_length", -1)
-                    if self.tokenizer is not None
-                    else getattr(self.tokenizer_2, "model_max_length", -1)
+                    getattr(self.tokenizer, "model_max_length", -1) if self.tokenizer is not None else getattr(self.tokenizer_2, "model_max_length", -1)
                 )
 
         if self.unet is not None:
-            self.unet.model = self._reshape_unet(
-                self.unet.model, batch_size, height, width, num_images_per_prompt, tokenizer_max_len
-            )
+            self.unet.model = self._reshape_unet(self.unet.model, batch_size, height, width, num_images_per_prompt, tokenizer_max_len)
         if self.transformer is not None:
             self.transformer.model = self._reshape_transformer(
                 self.transformer.model,
@@ -924,14 +869,10 @@ def reshape(self, batch_size: int, height: int, width: int, num_images_per_promp
                 tokenizer_max_len,
                 num_frames=num_frames,
             )
-        self.vae_decoder.model = self._reshape_vae_decoder(
-            self.vae_decoder.model, height, width, num_images_per_prompt, num_frames=num_frames
-        )
+        self.vae_decoder.model = self._reshape_vae_decoder(self.vae_decoder.model, height, width, num_images_per_prompt, num_frames=num_frames)
 
         if self.vae_encoder is not None:
-            self.vae_encoder.model = self._reshape_vae_encoder(
-                self.vae_encoder.model, batch_size, height, width, num_frames=num_frames
-            )
+            self.vae_encoder.model = self._reshape_vae_encoder(self.vae_encoder.model, batch_size, height, width, num_frames=num_frames)
 
         if self.text_encoder is not None:
             self.text_encoder.model = self._reshape_text_encoder(
@@ -940,16 +881,13 @@ def reshape(self, batch_size: int, height: int, width: int, num_images_per_promp
                 batch_size,
                 (
                     getattr(self.tokenizer, "model_max_length", -1)
-                    if "Gemma" not in self.tokenizer.__class__.__name__
-                    and not self.__class__.__name__.startswith("OVLTX")
+                    if "Gemma" not in self.tokenizer.__class__.__name__ and not self.__class__.__name__.startswith("OVLTX")
                     else -1
                 ),
             )
 
         if self.text_encoder_2 is not None:
-            self.text_encoder_2.model = self._reshape_text_encoder(
-                self.text_encoder_2.model, batch_size, getattr(self.tokenizer_2, "model_max_length", -1)
-            )
+            self.text_encoder_2.model = self._reshape_text_encoder(self.text_encoder_2.model, batch_size, getattr(self.tokenizer_2, "model_max_length", -1))
 
         if self.text_encoder_3 is not None:
             self.text_encoder_3.model = self._reshape_text_encoder(self.text_encoder_3.model, batch_size, -1)
@@ -962,9 +900,7 @@ def half(self):
         Converts all the model weights to FP16 for more efficient inference on GPU.
         """
         if self._compile_only:
-            raise ValueError(
-                "`half()` is not supported with `compile_only` mode, please initialize model without this option"
-            )
+            raise ValueError("`half()` is not supported with `compile_only` mode, please initialize model without this option")
 
         for submodel in self.ov_submodels.values():
             compress_model_transformation(submodel)
@@ -975,9 +911,7 @@ def half(self):
 
     def clear_requests(self):
         if self._compile_only:
-            raise ValueError(
-                "`clear_requests()` is not supported with `compile_only` mode, please initialize model without this option"
-            )
+            raise ValueError("`clear_requests()` is not supported with `compile_only` mode, please initialize model without this option")
         for submodel_name in self._ov_submodel_names:
             getattr(self, submodel_name).request = None
 
@@ -1064,9 +998,7 @@ def __call__(self, *args, **kwargs):
         # Disable this behavior for static shape pipeline
         if self.auto_model_class.__name__.startswith("Sana") and shapes_overridden:
             sig_resolution_bining_idx = (
-                list(sig.parameters).index("use_resolution_binning")
-                if "use_resolution_binning" in sig.parameters
-                else len(sig.parameters)
+                list(sig.parameters).index("use_resolution_binning") if "use_resolution_binning" in sig.parameters else len(sig.parameters)
             )
             if len(args) > sig_resolution_bining_idx:
                 args[sig_resolution_bining_idx] = False
@@ -1120,11 +1052,7 @@ def dtype(self) -> torch.dtype:
 
     def _compile(self):
         if self.request is None:
-            if (
-                "CACHE_DIR" not in self.ov_config.keys()
-                and not str(self.model_save_dir).startswith(gettempdir())
-                and "GPU" in self._device
-            ):
+            if "CACHE_DIR" not in self.ov_config.keys() and not str(self.model_save_dir).startswith(gettempdir()) and "GPU" in self._device:
                 self.ov_config["CACHE_DIR"] = os.path.join(self.model_save_dir, "model_cache")
 
             logger.info(f"Compiling the {self.model_name} to {self._device} ...")
@@ -1144,15 +1072,10 @@ def to(self, *args, device: Optional[str] = None, dtype: Optional[torch.dtype] =
             self._device = device.upper()
             self.request = None
         elif device is not None:
-            raise ValueError(
-                "The `device` argument should be a string representing the device on which the model should be loaded."
-            )
+            raise ValueError("The `device` argument should be a string representing the device on which the model should be loaded.")
 
         if dtype is not None and dtype != self.dtype:
-            raise NotImplementedError(
-                f"Cannot change the dtype of the model from {self.dtype} to {dtype}. "
-                f"Please export the model with the desired dtype."
-            )
+            raise NotImplementedError(f"Cannot change the dtype of the model from {self.dtype} to {dtype}. " f"Please export the model with the desired dtype.")
 
         return self
 
@@ -1170,9 +1093,7 @@ def modules(self):
 class OVModelTextEncoder(OVPipelinePart):
     def __init__(self, model: openvino.Model, parent_pipeline: OVDiffusionPipeline, model_name: str = ""):
         super().__init__(model, parent_pipeline, model_name)
-        self.hidden_states_output_names = [
-            name for out in self.model.outputs for name in out.names if name.startswith("hidden_states")
-        ]
+        self.hidden_states_output_names = [name for out in self.model.outputs for name in out.names if name.startswith("hidden_states")]
         self.input_names = [inp.get_any_name() for inp in self.model.inputs]
 
     def forward(
@@ -1196,11 +1117,7 @@ def forward(
             model_outputs["pooler_output"] = torch.from_numpy(ov_outputs[1])
         if self.hidden_states_output_names and "last_hidden_state" not in model_outputs:
             model_outputs["last_hidden_state"] = torch.from_numpy(ov_outputs[self.hidden_states_output_names[-1]])
-        if (
-            self.hidden_states_output_names
-            and output_hidden_states
-            or getattr(self.config, "output_hidden_states", False)
-        ):
+        if self.hidden_states_output_names and output_hidden_states or getattr(self.config, "output_hidden_states", False):
             hidden_states = [torch.from_numpy(ov_outputs[out_name]) for out_name in self.hidden_states_output_names]
             model_outputs["hidden_states"] = hidden_states
 
@@ -1357,9 +1274,7 @@ def forward(
             model_outputs["latents"] = model_outputs.pop("latent_sample")
 
         if "latent_parameters" in model_outputs:
-            model_outputs["latent_dist"] = DiagonalGaussianDistribution(
-                parameters=model_outputs.pop("latent_parameters")
-            )
+            model_outputs["latent_dist"] = DiagonalGaussianDistribution(parameters=model_outputs.pop("latent_parameters"))
 
         if return_dict:
             return model_outputs
@@ -1476,9 +1391,7 @@ class OVStableDiffusionPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMix
     auto_model_class = StableDiffusionPipeline
 
 
-class OVStableDiffusionImg2ImgPipeline(
-    OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionImg2ImgPipeline
-):
+class OVStableDiffusionImg2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionImg2ImgPipeline):
     """
     OpenVINO-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_img2img#diffusers.StableDiffusionImg2ImgPipeline).
     """
@@ -1488,9 +1401,7 @@ class OVStableDiffusionImg2ImgPipeline(
     auto_model_class = StableDiffusionImg2ImgPipeline
 
 
-class OVStableDiffusionInpaintPipeline(
-    OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionInpaintPipeline
-):
+class OVStableDiffusionInpaintPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionInpaintPipeline):
     """
     OpenVINO-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_inpaint#diffusers.StableDiffusionInpaintPipeline).
     """
@@ -1523,9 +1434,7 @@ def _get_add_time_ids(
         return add_time_ids
 
 
-class OVStableDiffusionXLImg2ImgPipeline(
-    OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionXLImg2ImgPipeline
-):
+class OVStableDiffusionXLImg2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionXLImg2ImgPipeline):
     """
     OpenVINO-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLImg2ImgPipeline).
     """
@@ -1549,9 +1458,7 @@ def _get_add_time_ids(
     ):
         if self.config.requires_aesthetics_score:
             add_time_ids = list(original_size + crops_coords_top_left + (aesthetic_score,))
-            add_neg_time_ids = list(
-                negative_original_size + negative_crops_coords_top_left + (negative_aesthetic_score,)
-            )
+            add_neg_time_ids = list(negative_original_size + negative_crops_coords_top_left + (negative_aesthetic_score,))
         else:
             add_time_ids = list(original_size + crops_coords_top_left + target_size)
             add_neg_time_ids = list(negative_original_size + crops_coords_top_left + negative_target_size)
@@ -1562,9 +1469,7 @@ def _get_add_time_ids(
         return add_time_ids, add_neg_time_ids
 
 
-class OVStableDiffusionXLInpaintPipeline(
-    OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionXLInpaintPipeline
-):
+class OVStableDiffusionXLInpaintPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionXLInpaintPipeline):
     """
     OpenVINO-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLInpaintPipeline).
     """
@@ -1588,9 +1493,7 @@ def _get_add_time_ids(
     ):
         if self.config.requires_aesthetics_score:
             add_time_ids = list(original_size + crops_coords_top_left + (aesthetic_score,))
-            add_neg_time_ids = list(
-                negative_original_size + negative_crops_coords_top_left + (negative_aesthetic_score,)
-            )
+            add_neg_time_ids = list(negative_original_size + negative_crops_coords_top_left + (negative_aesthetic_score,))
         else:
             add_time_ids = list(original_size + crops_coords_top_left + target_size)
             add_neg_time_ids = list(negative_original_size + crops_coords_top_left + negative_target_size)
@@ -1601,9 +1504,7 @@ def _get_add_time_ids(
         return add_time_ids, add_neg_time_ids
 
 
-class OVLatentConsistencyModelPipeline(
-    OVDiffusionPipeline, OVTextualInversionLoaderMixin, LatentConsistencyModelPipeline
-):
+class OVLatentConsistencyModelPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, LatentConsistencyModelPipeline):
     """
     OpenVINO-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline).
     """
@@ -1613,9 +1514,7 @@ class OVLatentConsistencyModelPipeline(
     auto_model_class = LatentConsistencyModelPipeline
 
 
-class OVLatentConsistencyModelImg2ImgPipeline(
-    OVDiffusionPipeline, OVTextualInversionLoaderMixin, LatentConsistencyModelImg2ImgPipeline
-):
+class OVLatentConsistencyModelImg2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, LatentConsistencyModelImg2ImgPipeline):
     """
     OpenVINO-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency_img2img#diffusers.LatentConsistencyModelImg2ImgPipeline).
     """
@@ -1631,17 +1530,13 @@ class OVStableDiffusion3Pipeline(OVDiffusionPipeline, OVTextualInversionLoaderMi
     auto_model_class = StableDiffusion3Pipeline
 
 
-class OVStableDiffusion3Img2ImgPipeline(
-    OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusion3Img2ImgPipeline
-):
+class OVStableDiffusion3Img2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusion3Img2ImgPipeline):
     main_input_name = "image"
     export_feature = "image-to-image"
     auto_model_class = StableDiffusion3Img2ImgPipeline
 
 
-class OVStableDiffusion3InpaintPipeline(
-    OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusion3InpaintPipeline
-):
+class OVStableDiffusion3InpaintPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusion3InpaintPipeline):
     main_input_name = "image"
     export_feature = "inpainting"
     auto_model_class = StableDiffusion3InpaintPipeline
@@ -1664,16 +1559,19 @@ class OVFluxInpaintPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin,
     export_feature = "inpainting"
     auto_model_class = FluxInpaintPipeline
 
+
 class OVFluxFillPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxFillPipeline):
     main_input_name = "image"
     export_feature = "inpainting"
     auto_model_class = FluxFillPipeline
 
+
 class OVFluxKontextPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxKontextPipeline):
     main_input_name = "image"
     export_feature = "image-to-image"
     auto_model_class = FluxKontextPipeline
 
+
 class OVSanaPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, SanaPipeline):
     main_input_name = "prompt"
     export_feature = "text-to-image"
@@ -1706,10 +1604,7 @@ class OVLTXPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, LTXPipel
 
 def _get_ov_class(pipeline_class_name: str, throw_error_if_not_exist: bool = True):
     for ov_pipeline_class in SUPPORTED_OV_PIPELINES:
-        if (
-            ov_pipeline_class.__name__ == pipeline_class_name
-            or ov_pipeline_class.auto_model_class.__name__ == pipeline_class_name
-        ):
+        if ov_pipeline_class.__name__ == pipeline_class_name or ov_pipeline_class.auto_model_class.__name__ == pipeline_class_name:
             return ov_pipeline_class
 
     if throw_error_if_not_exist:
@@ -1776,8 +1671,8 @@ def _get_ov_class(pipeline_class_name: str, throw_error_if_not_exist: bool = Tru
 if is_diffusers_version(">=", "0.33.0"):
     SUPPORTED_OV_PIPELINES.append(OVSanaSprintPipeline)
     OV_TEXT2IMAGE_PIPELINES_MAPPING["sana-sprint"] = OVSanaSprintPipeline
-    
-    
+
+
 if is_diffusers_version(">", "0.34.0"):
     SUPPORTED_OV_PIPELINES.extend([OVFluxKontextPipeline])
     OV_IMAGE2IMAGE_PIPELINES_MAPPING["flux"] = OVFluxKontextPipeline
@@ -1794,10 +1689,7 @@ def _get_task_ov_class(mapping, pipeline_class_name):
     def _get_model_name(pipeline_class_name):
         for ov_pipelines_mapping in SUPPORTED_OV_PIPELINES_MAPPINGS:
             for model_name, ov_pipeline_class in ov_pipelines_mapping.items():
-                if (
-                    ov_pipeline_class.__name__ == pipeline_class_name
-                    or ov_pipeline_class.auto_model_class.__name__ == pipeline_class_name
-                ):
+                if ov_pipeline_class.__name__ == pipeline_class_name or ov_pipeline_class.auto_model_class.__name__ == pipeline_class_name:
                     return model_name
 
     model_name = _get_model_name(pipeline_class_name)
diff --git a/optimum/intel/utils/dummy_openvino_and_diffusers_objects.py b/optimum/intel/utils/dummy_openvino_and_diffusers_objects.py
index 4845732922..e1b2afe3df 100644
--- a/optimum/intel/utils/dummy_openvino_and_diffusers_objects.py
+++ b/optimum/intel/utils/dummy_openvino_and_diffusers_objects.py
@@ -255,7 +255,8 @@ def __init__(self, *args, **kwargs):
     @classmethod
     def from_pretrained(cls, *args, **kwargs):
         requires_backends(cls, ["openvino", "diffusers"])
-        
+
+
 class OVSanaPipeline(metaclass=DummyObject):
     _backends = ["openvino", "diffusers"]
 

From 024ac2dc675c2b17e60fdddc43d60df990b224e7 Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Sun, 6 Jul 2025 20:31:05 -0700
Subject: [PATCH 03/15] add test case

---
 optimum/intel/openvino/modeling_diffusion.py | 2 +-
 tests/openvino/utils_tests.py                | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
index 907cd4b158..a6f2aafb96 100644
--- a/optimum/intel/openvino/modeling_diffusion.py
+++ b/optimum/intel/openvino/modeling_diffusion.py
@@ -1675,7 +1675,7 @@ def _get_ov_class(pipeline_class_name: str, throw_error_if_not_exist: bool = Tru
 
 if is_diffusers_version(">", "0.34.0"):
     SUPPORTED_OV_PIPELINES.extend([OVFluxKontextPipeline])
-    OV_IMAGE2IMAGE_PIPELINES_MAPPING["flux"] = OVFluxKontextPipeline
+    OV_IMAGE2IMAGE_PIPELINES_MAPPING["flux-kontext"] = OVFluxKontextPipeline
 
 SUPPORTED_OV_PIPELINES_MAPPINGS = [
     OV_TEXT2IMAGE_PIPELINES_MAPPING,
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index fae7f08269..7fcc5e6714 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -76,6 +76,7 @@
     "flaubert": "hf-internal-testing/tiny-random-flaubert",
     "flux": "katuni4ka/tiny-random-flux",
     "flux-fill": "katuni4ka/tiny-random-flux-fill",
+    "flux-kontext": "snake7gun/flux-kontext-random",
     "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
     "gpt2": "hf-internal-testing/tiny-random-gpt2",
     "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",

From c365139de486e5303279237a5c711babcc0a3d16 Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Sun, 6 Jul 2025 20:55:39 -0700
Subject: [PATCH 04/15] reformat

---
 optimum/intel/__init__.py                    |   8 +-
 optimum/intel/openvino/modeling_diffusion.py | 197 +++++++++++++++----
 2 files changed, 160 insertions(+), 45 deletions(-)

diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py
index 5bbf292f3f..cc08e3b09b 100644
--- a/optimum/intel/__init__.py
+++ b/optimum/intel/__init__.py
@@ -167,7 +167,9 @@
 except OptionalDependencyNotAvailable:
     from .utils import dummy_openvino_objects
 
-    _import_structure["utils.dummy_openvino_objects"] = [name for name in dir(dummy_openvino_objects) if not name.startswith("_")]
+    _import_structure["utils.dummy_openvino_objects"] = [
+        name for name in dir(dummy_openvino_objects) if not name.startswith("_")
+    ]
 else:
     _import_structure["openvino"].extend(
         [
@@ -204,7 +206,9 @@
 except OptionalDependencyNotAvailable:
     from .utils import dummy_neural_compressor_objects
 
-    _import_structure["utils.dummy_neural_compressor_objects"] = [name for name in dir(dummy_neural_compressor_objects) if not name.startswith("_")]
+    _import_structure["utils.dummy_neural_compressor_objects"] = [
+        name for name in dir(dummy_neural_compressor_objects) if not name.startswith("_")
+    ]
 else:
     _import_structure["neural_compressor"] = [
         "INCConfig",
diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
index a6f2aafb96..04a51afd65 100644
--- a/optimum/intel/openvino/modeling_diffusion.py
+++ b/optimum/intel/openvino/modeling_diffusion.py
@@ -201,15 +201,35 @@ def __init__(
                 )
 
         self.unet = OVModelUnet(unet, self, DIFFUSION_MODEL_UNET_SUBFOLDER) if unet is not None else None
-        self.transformer = OVModelTransformer(transformer, self, DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER) if transformer is not None else None
+        self.transformer = (
+            OVModelTransformer(transformer, self, DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER)
+            if transformer is not None
+            else None
+        )
 
         if unet is None and transformer is None:
             raise ValueError("`unet` or `transformer` model should be provided for pipeline work")
         self.vae_decoder = OVModelVaeDecoder(vae_decoder, self, DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER)
-        self.vae_encoder = OVModelVaeEncoder(vae_encoder, self, DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER) if vae_encoder is not None else None
-        self.text_encoder = OVModelTextEncoder(text_encoder, self, DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER) if text_encoder is not None else None
-        self.text_encoder_2 = OVModelTextEncoder(text_encoder_2, self, DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER) if text_encoder_2 is not None else None
-        self.text_encoder_3 = OVModelTextEncoder(text_encoder_3, self, DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER) if text_encoder_3 is not None else None
+        self.vae_encoder = (
+            OVModelVaeEncoder(vae_encoder, self, DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER)
+            if vae_encoder is not None
+            else None
+        )
+        self.text_encoder = (
+            OVModelTextEncoder(text_encoder, self, DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER)
+            if text_encoder is not None
+            else None
+        )
+        self.text_encoder_2 = (
+            OVModelTextEncoder(text_encoder_2, self, DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER)
+            if text_encoder_2 is not None
+            else None
+        )
+        self.text_encoder_3 = (
+            OVModelTextEncoder(text_encoder_3, self, DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER)
+            if text_encoder_3 is not None
+            else None
+        )
         # We wrap the VAE Decoder & Encoder in a single object to simulate diffusers API
         self.vae = OVModelVae(decoder=self.vae_decoder, encoder=self.vae_encoder)
 
@@ -290,7 +310,9 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
                 The directory where to save the model files
         """
         if self._compile_only:
-            raise ValueError("`save_pretrained()` is not supported with `compile_only` mode, please initialize model without this option")
+            raise ValueError(
+                "`save_pretrained()` is not supported with `compile_only` mode, please initialize model without this option"
+            )
 
         save_directory = Path(save_directory)
 
@@ -308,7 +330,11 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
                 dst_path = save_path / OV_XML_FILE_NAME
                 dst_path.parent.mkdir(parents=True, exist_ok=True)
                 openvino.save_model(model.model, dst_path, compress_to_fp16=False)
-                model_dir = self.model_save_dir if not isinstance(self.model_save_dir, TemporaryDirectory) else self.model_save_dir.name
+                model_dir = (
+                    self.model_save_dir
+                    if not isinstance(self.model_save_dir, TemporaryDirectory)
+                    else self.model_save_dir.name
+                )
                 config_path = Path(model_dir) / save_path.name / CONFIG_NAME
                 if config_path.is_file():
                     config_save_path = save_path / CONFIG_NAME
@@ -339,7 +365,11 @@ def _save_config(self, save_directory):
         Saves a model configuration into a directory, so that it can be re-loaded using the
         [`from_pretrained`] class method.
         """
-        model_dir = self.model_save_dir if not isinstance(self.model_save_dir, TemporaryDirectory) else self.model_save_dir.name
+        model_dir = (
+            self.model_save_dir
+            if not isinstance(self.model_save_dir, TemporaryDirectory)
+            else self.model_save_dir.name
+        )
         save_dir = Path(save_directory)
         original_config = Path(model_dir) / self.config_name
         if original_config.exists():
@@ -497,7 +527,11 @@ def _from_pretrained(
             ov_config = kwargs.get("ov_config", {})
             device = kwargs.get("device", "CPU")
             vae_ov_conifg = {**ov_config}
-            if "GPU" in device.upper() and "INFERENCE_PRECISION_HINT" not in vae_ov_conifg and is_openvino_version("<=", "2025.0"):
+            if (
+                "GPU" in device.upper()
+                and "INFERENCE_PRECISION_HINT" not in vae_ov_conifg
+                and is_openvino_version("<=", "2025.0")
+            ):
                 vae_model_path = models["vae_decoder"]
                 required_upcast = check_scale_available(vae_model_path)
                 if required_upcast:
@@ -638,10 +672,15 @@ def to(self, *args, device: Optional[str] = None, dtype: Optional[torch.dtype] =
             self._device = device.upper()
             self.clear_requests()
         elif device is not None:
-            raise ValueError("The `device` argument should be a string representing the device on which the model should be loaded.")
+            raise ValueError(
+                "The `device` argument should be a string representing the device on which the model should be loaded."
+            )
 
         if dtype is not None and dtype != self.dtype:
-            raise NotImplementedError(f"Cannot change the dtype of the model from {self.dtype} to {dtype}. " f"Please export the model with the desired dtype.")
+            raise NotImplementedError(
+                f"Cannot change the dtype of the model from {self.dtype} to {dtype}. "
+                f"Please export the model with the desired dtype."
+            )
 
         return self
 
@@ -651,7 +690,9 @@ def height(self) -> int:
         height = model.inputs[0].get_partial_shape()[-2]
         if height.is_dynamic:
             return -1
-        return height.get_length() * (self.vae_scale_factor if hasattr(self, "vae_scale_factor") else self.vae_spatial_compression_ratio)
+        return height.get_length() * (
+            self.vae_scale_factor if hasattr(self, "vae_scale_factor") else self.vae_spatial_compression_ratio
+        )
 
     @property
     def width(self) -> int:
@@ -659,7 +700,9 @@ def width(self) -> int:
         width = model.inputs[0].get_partial_shape()[-1]
         if width.is_dynamic:
             return -1
-        return width.get_length() * (self.vae_scale_factor if hasattr(self, "vae_scale_factor") else self.vae_spatial_compression_ratio)
+        return width.get_length() * (
+            self.vae_scale_factor if hasattr(self, "vae_scale_factor") else self.vae_spatial_compression_ratio
+        )
 
     @property
     def batch_size(self) -> int:
@@ -755,7 +798,9 @@ def _reshape_transformer(
             elif inputs.get_any_name() == "hidden_states":
                 in_channels = self.transformer.config.get("in_channels", None)
                 if in_channels is None:
-                    in_channels = shapes[inputs][1] if inputs.get_partial_shape().rank.get_length() == 4 else shapes[inputs][2]
+                    in_channels = (
+                        shapes[inputs][1] if inputs.get_partial_shape().rank.get_length() == 4 else shapes[inputs][2]
+                    )
                     if in_channels.is_dynamic:
                         logger.warning(
                             "Could not identify `in_channels` from the unet configuration, to statically reshape the unet please provide a configuration."
@@ -769,7 +814,11 @@ def _reshape_transformer(
             elif inputs.get_any_name() == "pooled_projections":
                 shapes[inputs] = [batch_size, self.transformer.config["pooled_projection_dim"]]
             elif inputs.get_any_name() == "img_ids":
-                shapes[inputs] = [batch_size, packed_height_width, 3] if is_diffusers_version("<", "0.31.0") else [packed_height_width, 3]
+                shapes[inputs] = (
+                    [batch_size, packed_height_width, 3]
+                    if is_diffusers_version("<", "0.31.0")
+                    else [packed_height_width, 3]
+                )
             elif inputs.get_any_name() == "txt_ids":
                 shapes[inputs] = [batch_size, -1, 3] if is_diffusers_version("<", "0.31.0") else [-1, 3]
             elif inputs.get_any_name() in ["height", "width", "num_frames", "rope_interpolation_scale"]:
@@ -843,7 +892,9 @@ def _reshape_vae_decoder(
 
     def reshape(self, batch_size: int, height: int, width: int, num_images_per_prompt: int = -1, num_frames: int = -1):
         if self._compile_only:
-            raise ValueError("`reshape()` is not supported with `compile_only` mode, please initialize model without this option")
+            raise ValueError(
+                "`reshape()` is not supported with `compile_only` mode, please initialize model without this option"
+            )
 
         self.is_dynamic = -1 in {batch_size, height, width, num_images_per_prompt}
 
@@ -854,11 +905,15 @@ def reshape(self, batch_size: int, height: int, width: int, num_images_per_promp
                 tokenizer_max_len = -1
             else:
                 tokenizer_max_len = (
-                    getattr(self.tokenizer, "model_max_length", -1) if self.tokenizer is not None else getattr(self.tokenizer_2, "model_max_length", -1)
+                    getattr(self.tokenizer, "model_max_length", -1)
+                    if self.tokenizer is not None
+                    else getattr(self.tokenizer_2, "model_max_length", -1)
                 )
 
         if self.unet is not None:
-            self.unet.model = self._reshape_unet(self.unet.model, batch_size, height, width, num_images_per_prompt, tokenizer_max_len)
+            self.unet.model = self._reshape_unet(
+                self.unet.model, batch_size, height, width, num_images_per_prompt, tokenizer_max_len
+            )
         if self.transformer is not None:
             self.transformer.model = self._reshape_transformer(
                 self.transformer.model,
@@ -869,10 +924,14 @@ def reshape(self, batch_size: int, height: int, width: int, num_images_per_promp
                 tokenizer_max_len,
                 num_frames=num_frames,
             )
-        self.vae_decoder.model = self._reshape_vae_decoder(self.vae_decoder.model, height, width, num_images_per_prompt, num_frames=num_frames)
+        self.vae_decoder.model = self._reshape_vae_decoder(
+            self.vae_decoder.model, height, width, num_images_per_prompt, num_frames=num_frames
+        )
 
         if self.vae_encoder is not None:
-            self.vae_encoder.model = self._reshape_vae_encoder(self.vae_encoder.model, batch_size, height, width, num_frames=num_frames)
+            self.vae_encoder.model = self._reshape_vae_encoder(
+                self.vae_encoder.model, batch_size, height, width, num_frames=num_frames
+            )
 
         if self.text_encoder is not None:
             self.text_encoder.model = self._reshape_text_encoder(
@@ -881,13 +940,16 @@ def reshape(self, batch_size: int, height: int, width: int, num_images_per_promp
                 batch_size,
                 (
                     getattr(self.tokenizer, "model_max_length", -1)
-                    if "Gemma" not in self.tokenizer.__class__.__name__ and not self.__class__.__name__.startswith("OVLTX")
+                    if "Gemma" not in self.tokenizer.__class__.__name__
+                    and not self.__class__.__name__.startswith("OVLTX")
                     else -1
                 ),
             )
 
         if self.text_encoder_2 is not None:
-            self.text_encoder_2.model = self._reshape_text_encoder(self.text_encoder_2.model, batch_size, getattr(self.tokenizer_2, "model_max_length", -1))
+            self.text_encoder_2.model = self._reshape_text_encoder(
+                self.text_encoder_2.model, batch_size, getattr(self.tokenizer_2, "model_max_length", -1)
+            )
 
         if self.text_encoder_3 is not None:
             self.text_encoder_3.model = self._reshape_text_encoder(self.text_encoder_3.model, batch_size, -1)
@@ -900,7 +962,9 @@ def half(self):
         Converts all the model weights to FP16 for more efficient inference on GPU.
         """
         if self._compile_only:
-            raise ValueError("`half()` is not supported with `compile_only` mode, please initialize model without this option")
+            raise ValueError(
+                "`half()` is not supported with `compile_only` mode, please initialize model without this option"
+            )
 
         for submodel in self.ov_submodels.values():
             compress_model_transformation(submodel)
@@ -911,7 +975,9 @@ def half(self):
 
     def clear_requests(self):
         if self._compile_only:
-            raise ValueError("`clear_requests()` is not supported with `compile_only` mode, please initialize model without this option")
+            raise ValueError(
+                "`clear_requests()` is not supported with `compile_only` mode, please initialize model without this option"
+            )
         for submodel_name in self._ov_submodel_names:
             getattr(self, submodel_name).request = None
 
@@ -998,7 +1064,9 @@ def __call__(self, *args, **kwargs):
         # Disable this behavior for static shape pipeline
         if self.auto_model_class.__name__.startswith("Sana") and shapes_overridden:
             sig_resolution_bining_idx = (
-                list(sig.parameters).index("use_resolution_binning") if "use_resolution_binning" in sig.parameters else len(sig.parameters)
+                list(sig.parameters).index("use_resolution_binning")
+                if "use_resolution_binning" in sig.parameters
+                else len(sig.parameters)
             )
             if len(args) > sig_resolution_bining_idx:
                 args[sig_resolution_bining_idx] = False
@@ -1052,7 +1120,11 @@ def dtype(self) -> torch.dtype:
 
     def _compile(self):
         if self.request is None:
-            if "CACHE_DIR" not in self.ov_config.keys() and not str(self.model_save_dir).startswith(gettempdir()) and "GPU" in self._device:
+            if (
+                "CACHE_DIR" not in self.ov_config.keys()
+                and not str(self.model_save_dir).startswith(gettempdir())
+                and "GPU" in self._device
+            ):
                 self.ov_config["CACHE_DIR"] = os.path.join(self.model_save_dir, "model_cache")
 
             logger.info(f"Compiling the {self.model_name} to {self._device} ...")
@@ -1072,10 +1144,15 @@ def to(self, *args, device: Optional[str] = None, dtype: Optional[torch.dtype] =
             self._device = device.upper()
             self.request = None
         elif device is not None:
-            raise ValueError("The `device` argument should be a string representing the device on which the model should be loaded.")
+            raise ValueError(
+                "The `device` argument should be a string representing the device on which the model should be loaded."
+            )
 
         if dtype is not None and dtype != self.dtype:
-            raise NotImplementedError(f"Cannot change the dtype of the model from {self.dtype} to {dtype}. " f"Please export the model with the desired dtype.")
+            raise NotImplementedError(
+                f"Cannot change the dtype of the model from {self.dtype} to {dtype}. "
+                f"Please export the model with the desired dtype."
+            )
 
         return self
 
@@ -1093,7 +1170,9 @@ def modules(self):
 class OVModelTextEncoder(OVPipelinePart):
     def __init__(self, model: openvino.Model, parent_pipeline: OVDiffusionPipeline, model_name: str = ""):
         super().__init__(model, parent_pipeline, model_name)
-        self.hidden_states_output_names = [name for out in self.model.outputs for name in out.names if name.startswith("hidden_states")]
+        self.hidden_states_output_names = [
+            name for out in self.model.outputs for name in out.names if name.startswith("hidden_states")
+        ]
         self.input_names = [inp.get_any_name() for inp in self.model.inputs]
 
     def forward(
@@ -1117,7 +1196,11 @@ def forward(
             model_outputs["pooler_output"] = torch.from_numpy(ov_outputs[1])
         if self.hidden_states_output_names and "last_hidden_state" not in model_outputs:
             model_outputs["last_hidden_state"] = torch.from_numpy(ov_outputs[self.hidden_states_output_names[-1]])
-        if self.hidden_states_output_names and output_hidden_states or getattr(self.config, "output_hidden_states", False):
+        if (
+            self.hidden_states_output_names
+            and output_hidden_states
+            or getattr(self.config, "output_hidden_states", False)
+        ):
             hidden_states = [torch.from_numpy(ov_outputs[out_name]) for out_name in self.hidden_states_output_names]
             model_outputs["hidden_states"] = hidden_states
 
@@ -1274,7 +1357,9 @@ def forward(
             model_outputs["latents"] = model_outputs.pop("latent_sample")
 
         if "latent_parameters" in model_outputs:
-            model_outputs["latent_dist"] = DiagonalGaussianDistribution(parameters=model_outputs.pop("latent_parameters"))
+            model_outputs["latent_dist"] = DiagonalGaussianDistribution(
+                parameters=model_outputs.pop("latent_parameters")
+            )
 
         if return_dict:
             return model_outputs
@@ -1391,7 +1476,9 @@ class OVStableDiffusionPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMix
     auto_model_class = StableDiffusionPipeline
 
 
-class OVStableDiffusionImg2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionImg2ImgPipeline):
+class OVStableDiffusionImg2ImgPipeline(
+    OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionImg2ImgPipeline
+):
     """
     OpenVINO-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_img2img#diffusers.StableDiffusionImg2ImgPipeline).
     """
@@ -1401,7 +1488,9 @@ class OVStableDiffusionImg2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLo
     auto_model_class = StableDiffusionImg2ImgPipeline
 
 
-class OVStableDiffusionInpaintPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionInpaintPipeline):
+class OVStableDiffusionInpaintPipeline(
+    OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionInpaintPipeline
+):
     """
     OpenVINO-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_inpaint#diffusers.StableDiffusionInpaintPipeline).
     """
@@ -1434,7 +1523,9 @@ def _get_add_time_ids(
         return add_time_ids
 
 
-class OVStableDiffusionXLImg2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionXLImg2ImgPipeline):
+class OVStableDiffusionXLImg2ImgPipeline(
+    OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionXLImg2ImgPipeline
+):
     """
     OpenVINO-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLImg2ImgPipeline).
     """
@@ -1458,7 +1549,9 @@ def _get_add_time_ids(
     ):
         if self.config.requires_aesthetics_score:
             add_time_ids = list(original_size + crops_coords_top_left + (aesthetic_score,))
-            add_neg_time_ids = list(negative_original_size + negative_crops_coords_top_left + (negative_aesthetic_score,))
+            add_neg_time_ids = list(
+                negative_original_size + negative_crops_coords_top_left + (negative_aesthetic_score,)
+            )
         else:
             add_time_ids = list(original_size + crops_coords_top_left + target_size)
             add_neg_time_ids = list(negative_original_size + crops_coords_top_left + negative_target_size)
@@ -1469,7 +1562,9 @@ def _get_add_time_ids(
         return add_time_ids, add_neg_time_ids
 
 
-class OVStableDiffusionXLInpaintPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionXLInpaintPipeline):
+class OVStableDiffusionXLInpaintPipeline(
+    OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusionXLInpaintPipeline
+):
     """
     OpenVINO-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusionXLInpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#diffusers.StableDiffusionXLInpaintPipeline).
     """
@@ -1493,7 +1588,9 @@ def _get_add_time_ids(
     ):
         if self.config.requires_aesthetics_score:
             add_time_ids = list(original_size + crops_coords_top_left + (aesthetic_score,))
-            add_neg_time_ids = list(negative_original_size + negative_crops_coords_top_left + (negative_aesthetic_score,))
+            add_neg_time_ids = list(
+                negative_original_size + negative_crops_coords_top_left + (negative_aesthetic_score,)
+            )
         else:
             add_time_ids = list(original_size + crops_coords_top_left + target_size)
             add_neg_time_ids = list(negative_original_size + crops_coords_top_left + negative_target_size)
@@ -1504,7 +1601,9 @@ def _get_add_time_ids(
         return add_time_ids, add_neg_time_ids
 
 
-class OVLatentConsistencyModelPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, LatentConsistencyModelPipeline):
+class OVLatentConsistencyModelPipeline(
+    OVDiffusionPipeline, OVTextualInversionLoaderMixin, LatentConsistencyModelPipeline
+):
     """
     OpenVINO-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency#diffusers.LatentConsistencyModelPipeline).
     """
@@ -1514,7 +1613,9 @@ class OVLatentConsistencyModelPipeline(OVDiffusionPipeline, OVTextualInversionLo
     auto_model_class = LatentConsistencyModelPipeline
 
 
-class OVLatentConsistencyModelImg2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, LatentConsistencyModelImg2ImgPipeline):
+class OVLatentConsistencyModelImg2ImgPipeline(
+    OVDiffusionPipeline, OVTextualInversionLoaderMixin, LatentConsistencyModelImg2ImgPipeline
+):
     """
     OpenVINO-powered stable diffusion pipeline corresponding to [diffusers.LatentConsistencyModelImg2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/latent_consistency_img2img#diffusers.LatentConsistencyModelImg2ImgPipeline).
     """
@@ -1530,13 +1631,17 @@ class OVStableDiffusion3Pipeline(OVDiffusionPipeline, OVTextualInversionLoaderMi
     auto_model_class = StableDiffusion3Pipeline
 
 
-class OVStableDiffusion3Img2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusion3Img2ImgPipeline):
+class OVStableDiffusion3Img2ImgPipeline(
+    OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusion3Img2ImgPipeline
+):
     main_input_name = "image"
     export_feature = "image-to-image"
     auto_model_class = StableDiffusion3Img2ImgPipeline
 
 
-class OVStableDiffusion3InpaintPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusion3InpaintPipeline):
+class OVStableDiffusion3InpaintPipeline(
+    OVDiffusionPipeline, OVTextualInversionLoaderMixin, StableDiffusion3InpaintPipeline
+):
     main_input_name = "image"
     export_feature = "inpainting"
     auto_model_class = StableDiffusion3InpaintPipeline
@@ -1604,7 +1709,10 @@ class OVLTXPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, LTXPipel
 
 def _get_ov_class(pipeline_class_name: str, throw_error_if_not_exist: bool = True):
     for ov_pipeline_class in SUPPORTED_OV_PIPELINES:
-        if ov_pipeline_class.__name__ == pipeline_class_name or ov_pipeline_class.auto_model_class.__name__ == pipeline_class_name:
+        if (
+            ov_pipeline_class.__name__ == pipeline_class_name
+            or ov_pipeline_class.auto_model_class.__name__ == pipeline_class_name
+        ):
             return ov_pipeline_class
 
     if throw_error_if_not_exist:
@@ -1689,7 +1797,10 @@ def _get_task_ov_class(mapping, pipeline_class_name):
     def _get_model_name(pipeline_class_name):
         for ov_pipelines_mapping in SUPPORTED_OV_PIPELINES_MAPPINGS:
             for model_name, ov_pipeline_class in ov_pipelines_mapping.items():
-                if ov_pipeline_class.__name__ == pipeline_class_name or ov_pipeline_class.auto_model_class.__name__ == pipeline_class_name:
+                if (
+                    ov_pipeline_class.__name__ == pipeline_class_name
+                    or ov_pipeline_class.auto_model_class.__name__ == pipeline_class_name
+                ):
                     return model_name
 
     model_name = _get_model_name(pipeline_class_name)

From 051a8da93fb09cf79e85ba5f4e98aeacdb521c10 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Mon, 7 Jul 2025 08:43:12 +0000
Subject: [PATCH 05/15] Apply style fixes

---
 optimum/intel/openvino/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/intel/openvino/__init__.py b/optimum/intel/openvino/__init__.py
index f9a6f39c52..5565905503 100644
--- a/optimum/intel/openvino/__init__.py
+++ b/optimum/intel/openvino/__init__.py
@@ -90,8 +90,8 @@
         OVFluxFillPipeline,
         OVFluxImg2ImgPipeline,
         OVFluxInpaintPipeline,
-        OVFluxPipeline,
         OVFluxKontextPipeline,
+        OVFluxPipeline,
         OVLatentConsistencyModelImg2ImgPipeline,
         OVLatentConsistencyModelPipeline,
         OVLTXPipeline,

From c49252f08d517b514041b4d8a9724f691cec18f2 Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Mon, 7 Jul 2025 19:39:04 -0700
Subject: [PATCH 06/15] add diffusion and export test

---
 tests/openvino/test_diffusion.py     | 5 +++--
 tests/openvino/test_exporters_cli.py | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/openvino/test_diffusion.py b/tests/openvino/test_diffusion.py
index 5b1d3af656..8859c8f7e4 100644
--- a/tests/openvino/test_diffusion.py
+++ b/tests/openvino/test_diffusion.py
@@ -483,6 +483,7 @@ class OVPipelineForImage2ImageTest(unittest.TestCase):
     if is_transformers_version(">=", "4.40.0"):
         SUPPORTED_ARCHITECTURES.append("stable-diffusion-3")
         SUPPORTED_ARCHITECTURES.append("flux")
+        SUPPORTED_ARCHITECTURES.append("flux-kontext")
 
     AUTOMODEL_CLASS = AutoPipelineForImage2Image
     OVMODEL_CLASS = OVPipelineForImage2Image
@@ -496,7 +497,7 @@ def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_
             height=height, width=width, batch_size=batch_size, channel=channel, input_type=input_type
         )
 
-        if model_type in ["flux", "stable-diffusion-3"]:
+        if model_type in ["flux", "stable-diffusion-3", "flux-kontext"]:
             inputs["height"] = height
             inputs["width"] = width
 
@@ -583,7 +584,7 @@ def test_shape(self, model_arch: str):
                 elif output_type == "pt":
                     self.assertEqual(outputs.shape, (batch_size, 3, height, width))
                 else:
-                    if model_arch != "flux":
+                    if model_arch != "flux" and model_arch != "flux-kontext":
                         out_channels = (
                             pipeline.unet.config.out_channels
                             if pipeline.unet is not None
diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index 83b5b1e80d..6b00e1eedd 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -31,6 +31,7 @@
 from optimum.exporters.openvino.utils import COMPLEX_CHAT_TEMPLATES
 from optimum.intel import (  # noqa
     OVFluxFillPipeline,
+    OVFluxKontextPipeline,
     OVFluxPipeline,
     OVLatentConsistencyModelPipeline,
     OVLTXPipeline,
@@ -100,6 +101,7 @@ class OVCLIExportTestCase(unittest.TestCase):
                 ("inpainting", "flux-fill"),
                 ("text-to-image", "sana"),
                 ("text-to-video", "ltx-video"),
+                ("image-to-image", "flux-kontext"),
             ]
         )
     EXPECTED_NUMBER_OF_TOKENIZER_MODELS = {
@@ -117,6 +119,7 @@ class OVCLIExportTestCase(unittest.TestCase):
         "stable-diffusion-3": 6 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 2,
         "flux": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
         "flux-fill": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
+        "flux-kontext": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
         "llava": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
         "sana": 2 if is_tokenizers_version("<", "0.20.0") or is_openvino_version(">=", "2024.5") else 0,
         "ltx-video": 2 if is_tokenizers_version("<", "0.20.0") or is_openvino_version(">=", "2024.5") else 0,

From 8459565812fa69be50982d0931f5db4ad89d3e40 Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Tue, 8 Jul 2025 18:43:11 -0700
Subject: [PATCH 07/15] rebase the export cli test

---
 tests/openvino/test_diffusion.py     | 4 +++-
 tests/openvino/test_exporters_cli.py | 2 --
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/openvino/test_diffusion.py b/tests/openvino/test_diffusion.py
index 8859c8f7e4..63fc6b4803 100644
--- a/tests/openvino/test_diffusion.py
+++ b/tests/openvino/test_diffusion.py
@@ -24,6 +24,7 @@
     AutoPipelineForInpainting,
     AutoPipelineForText2Image,
     DiffusionPipeline,
+    FluxKontextPipeline,
 )
 from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
 from diffusers.utils import load_image
@@ -611,7 +612,8 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str):
         height, width, batch_size = 128, 128, 1
         inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_type=model_arch)
 
-        diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
+        auto_cls = self.AUTOMODEL_CLASS if "flux-kontext" not in model_arch else FluxKontextPipeline
+        diffusers_pipeline = auto_cls.from_pretrained(MODEL_NAMES[model_arch])
         ov_pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
 
         for output_type in ["latent", "np", "pt"]:
diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index 6b00e1eedd..9e339f1d4f 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -101,7 +101,6 @@ class OVCLIExportTestCase(unittest.TestCase):
                 ("inpainting", "flux-fill"),
                 ("text-to-image", "sana"),
                 ("text-to-video", "ltx-video"),
-                ("image-to-image", "flux-kontext"),
             ]
         )
     EXPECTED_NUMBER_OF_TOKENIZER_MODELS = {
@@ -119,7 +118,6 @@ class OVCLIExportTestCase(unittest.TestCase):
         "stable-diffusion-3": 6 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 2,
         "flux": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
         "flux-fill": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
-        "flux-kontext": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
         "llava": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
         "sana": 2 if is_tokenizers_version("<", "0.20.0") or is_openvino_version(">=", "2024.5") else 0,
         "ltx-video": 2 if is_tokenizers_version("<", "0.20.0") or is_openvino_version(">=", "2024.5") else 0,

From 2c7e1dee1f2ed916ee163faac98d46818bcb417f Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Tue, 8 Jul 2025 22:48:16 -0700
Subject: [PATCH 08/15] add quantize test case

---
 tests/openvino/test_quantization.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index 26c6283b85..f2933dcada 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -42,6 +42,7 @@
 from optimum.intel import (
     OVConfig,
     OVFluxPipeline,
+    OVFluxKontextPipeline,
     OVLatentConsistencyModelPipeline,
     OVModelForAudioClassification,
     OVModelForCausalLM,
@@ -1039,6 +1040,7 @@ class OVWeightCompressionTest(unittest.TestCase):
                 (OVStableDiffusion3Pipeline, "stable-diffusion-3", 9, 65),
                 (OVFluxPipeline, "flux", 7, 56),
                 (OVSanaPipeline, "sana", 19, 53),
+                (OVFluxKontextPipeline, "flux-kontext", 19, 53),
             ]
         )
 

From c160e7fbb03d83112fb738721e90ec502e6c6d3a Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Tue, 8 Jul 2025 23:26:15 -0700
Subject: [PATCH 09/15] delete cli export test

---
 tests/openvino/test_exporters_cli.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index 9e339f1d4f..83b5b1e80d 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -31,7 +31,6 @@
 from optimum.exporters.openvino.utils import COMPLEX_CHAT_TEMPLATES
 from optimum.intel import (  # noqa
     OVFluxFillPipeline,
-    OVFluxKontextPipeline,
     OVFluxPipeline,
     OVLatentConsistencyModelPipeline,
     OVLTXPipeline,

From 1e37d3d9a2068ba82fe97b49dfe7adb9cc0b58db Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Tue, 19 Aug 2025 21:13:34 -0700
Subject: [PATCH 10/15] support diffusers 3.5

---
 optimum/intel/openvino/modeling_diffusion.py | 2 +-
 tests/openvino/test_exporters_cli.py         | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
index 72752b2c65..2839e7fb7a 100644
--- a/optimum/intel/openvino/modeling_diffusion.py
+++ b/optimum/intel/openvino/modeling_diffusion.py
@@ -121,7 +121,7 @@
 else:
     SanaSprintPipeline = object
 
-if is_diffusers_version(">", "0.34.0"):
+if is_diffusers_version(">=", "0.35.0"):
     from diffusers import FluxKontextPipeline
 else:
     FluxKontextPipeline = object
diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index eb51b99568..41064af092 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -33,6 +33,7 @@
 from optimum.intel import (  # noqa
     OVFluxFillPipeline,
     OVFluxPipeline,
+    OVFluxKontextPipeline,
     OVLatentConsistencyModelPipeline,
     OVLTXPipeline,
     OVModelForAudioClassification,
@@ -110,6 +111,7 @@ class OVCLIExportTestCase(unittest.TestCase):
                 ("text-to-image", "stable-diffusion-3"),
                 ("text-to-image", "flux"),
                 ("inpainting", "flux-fill"),
+                ("image-to-image", "flux-kontext"),
                 ("text-to-image", "sana"),
                 ("text-to-video", "ltx-video"),
             ]
@@ -137,6 +139,7 @@ class OVCLIExportTestCase(unittest.TestCase):
         "stable-diffusion-3": 6 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 2,
         "flux": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
         "flux-fill": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
+        "flux-kontext": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
         "llava": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
         "sana": 2 if is_tokenizers_version("<", "0.20.0") or is_openvino_version(">=", "2024.5") else 0,
         "ltx-video": 2 if is_tokenizers_version("<", "0.20.0") or is_openvino_version(">=", "2024.5") else 0,

From cb7df16372d34c661855ef4567d45aeaec19438e Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Tue, 9 Sep 2025 21:05:01 -0700
Subject: [PATCH 11/15] update diffusion test with static shape

update diffusion test with static shape
---
 optimum/intel/openvino/modeling_diffusion.py |  4 +--
 tests/openvino/test_diffusion.py             | 35 ++++++++++++++------
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
index 2839e7fb7a..d558184693 100644
--- a/optimum/intel/openvino/modeling_diffusion.py
+++ b/optimum/intel/openvino/modeling_diffusion.py
@@ -1791,8 +1791,8 @@ def _get_ov_class(pipeline_class_name: str, throw_error_if_not_exist: bool = Tru
     OV_TEXT2IMAGE_PIPELINES_MAPPING["sana-sprint"] = OVSanaSprintPipeline
 
 
-if is_diffusers_version(">", "0.34.0"):
-    SUPPORTED_OV_PIPELINES.extend([OVFluxKontextPipeline])
+if is_diffusers_version(">=", "0.34.0"):
+    SUPPORTED_OV_PIPELINES.append(OVFluxKontextPipeline)
     OV_IMAGE2IMAGE_PIPELINES_MAPPING["flux-kontext"] = OVFluxKontextPipeline
 
 SUPPORTED_OV_PIPELINES_MAPPINGS = [
diff --git a/tests/openvino/test_diffusion.py b/tests/openvino/test_diffusion.py
index d100cf8944..cbdb79a519 100644
--- a/tests/openvino/test_diffusion.py
+++ b/tests/openvino/test_diffusion.py
@@ -24,7 +24,6 @@
     AutoPipelineForInpainting,
     AutoPipelineForText2Image,
     DiffusionPipeline,
-    FluxKontextPipeline,
 )
 from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
 from diffusers.utils import load_image
@@ -485,7 +484,8 @@ class OVPipelineForImage2ImageTest(unittest.TestCase):
     if is_transformers_version(">=", "4.40.0"):
         SUPPORTED_ARCHITECTURES.append("stable-diffusion-3")
         SUPPORTED_ARCHITECTURES.append("flux")
-        SUPPORTED_ARCHITECTURES.append("flux-kontext")
+        if is_diffusers_version(">=", "0.35.0"):
+            SUPPORTED_ARCHITECTURES.append("flux-kontext")
 
     AUTOMODEL_CLASS = AutoPipelineForImage2Image
     OVMODEL_CLASS = OVPipelineForImage2Image
@@ -502,8 +502,9 @@ def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_
         if model_type in ["flux", "stable-diffusion-3", "flux-kontext"]:
             inputs["height"] = height
             inputs["width"] = width
-
-        inputs["strength"] = 0.75
+            
+        if model_type != "flux-kontext":
+            inputs["strength"] = 0.75
 
         return inputs
 
@@ -535,7 +536,16 @@ def test_num_images_per_prompt(self, model_arch: str):
                             height=height, width=width, batch_size=batch_size, model_type=model_arch
                         )
                         outputs = pipeline(**inputs, num_images_per_prompt=num_images_per_prompt).images
-                        self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, height, width, 3))
+                        if model_arch != "flux-kontext":
+                            self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, height, width, 3))
+                        else:
+                        # output shape is fixed: https://github.com/huggingface/diffusers/blob/v0.35.1/src/diffusers/pipelines/flux/pipeline_flux_kontext.py#L882
+                            if (height == width):
+                                self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, 1024, 1024, 3))
+                            elif (height > width):
+                                self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, 1448, 724, 3))
+                            else:
+                                self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, 724, 1448, 3)) 
 
     @parameterized.expand(["stable-diffusion", "stable-diffusion-xl", "latent-consistency"])
     @require_diffusers
@@ -568,8 +578,11 @@ def __call__(self, *args, **kwargs) -> None:
     @require_diffusers
     def test_shape(self, model_arch: str):
         pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
-
-        height, width, batch_size = 128, 64, 1
+        if model_arch != "flux-kontext":
+        # output shape is fixed: https://github.com/huggingface/diffusers/blob/v0.35.1/src/diffusers/pipelines/flux/pipeline_flux_kontext.py#L882
+            height, width, batch_size = 128, 64, 1
+        else:
+            height, width, batch_size = 1448, 724, 1
 
         for input_type in ["pil", "np", "pt"]:
             inputs = self.generate_inputs(
@@ -586,7 +599,7 @@ def test_shape(self, model_arch: str):
                 elif output_type == "pt":
                     self.assertEqual(outputs.shape, (batch_size, 3, height, width))
                 else:
-                    if model_arch != "flux" and model_arch != "flux-kontext":
+                    if not model_arch.startswith("flux"):
                         out_channels = (
                             pipeline.unet.config.out_channels
                             if pipeline.unet is not None
@@ -611,9 +624,9 @@ def test_shape(self, model_arch: str):
     @require_diffusers
     def test_compare_to_diffusers_pipeline(self, model_arch: str):
         height, width, batch_size = 128, 128, 1
-        inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_type=model_arch)
-
-        auto_cls = self.AUTOMODEL_CLASS if "flux-kontext" not in model_arch else FluxKontextPipeline
+        inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_type=model_arch)        
+        auto_cls = self.AUTOMODEL_CLASS
+        
         diffusers_pipeline = auto_cls.from_pretrained(MODEL_NAMES[model_arch])
         ov_pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch])
 

From 8b8e2c821c8533e25577ccf457ba31a038796bc9 Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Wed, 10 Sep 2025 09:06:45 -0700
Subject: [PATCH 12/15] remove quantization test

---
 tests/openvino/test_exporters_cli.py | 8 +++++++-
 tests/openvino/test_quantization.py  | 2 --
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index 41064af092..4abbfe5eb0 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -67,6 +67,7 @@
     is_openvino_version,
     is_tokenizers_version,
     is_transformers_version,
+    is_diffusers_version,
 )
 
 
@@ -111,11 +112,16 @@ class OVCLIExportTestCase(unittest.TestCase):
                 ("text-to-image", "stable-diffusion-3"),
                 ("text-to-image", "flux"),
                 ("inpainting", "flux-fill"),
-                ("image-to-image", "flux-kontext"),
                 ("text-to-image", "sana"),
                 ("text-to-video", "ltx-video"),
             ]
         )
+        if is_diffusers_version(">=", "0.35.0"):
+            SUPPORTED_ARCHITECTURES.extend(
+                [
+                    ("image-to-image", "flux-kontext"),
+                ]
+            )
 
     if is_transformers_version(">=", "4.54"):
         SUPPORTED_ARCHITECTURES.extend(
diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index c5bb46c34a..c9497e804e 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -42,7 +42,6 @@
 from optimum.intel import (
     OVConfig,
     OVFluxPipeline,
-    OVFluxKontextPipeline,
     OVLatentConsistencyModelPipeline,
     OVModelForAudioClassification,
     OVModelForCausalLM,
@@ -1134,7 +1133,6 @@ class OVWeightCompressionTest(unittest.TestCase):
                 (OVStableDiffusion3Pipeline, "stable-diffusion-3", 9, 65),
                 (OVFluxPipeline, "flux", 7, 56),
                 (OVSanaPipeline, "sana", 19, 53),
-                (OVFluxKontextPipeline, "flux-kontext", 19, 53),
             ]
         )
 

From dd534af28d160e3e9723b77d9b95734cb2d354cb Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Wed, 17 Sep 2025 19:55:20 -0700
Subject: [PATCH 13/15] add flux-kontext to _HEAD_TO_AUTOMODELS

---
 optimum/intel/openvino/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/intel/openvino/utils.py b/optimum/intel/openvino/utils.py
index ac60ce516c..9cfbd26dd7 100644
--- a/optimum/intel/openvino/utils.py
+++ b/optimum/intel/openvino/utils.py
@@ -139,6 +139,7 @@
     "sana": "OVSanaPipeline",
     "flux": "OVFluxPipeline",
     "flux-fill": "OVFluxFillPipeline",
+    "flux-kontext": "OVFluxKontextPipeline",
     "pix2struct": "OVModelForPix2Struct",
     "latent-consistency": "OVLatentConsistencyModelPipeline",
     "open_clip_text": "OVModelOpenCLIPText",

From bcd50709a067d25e2a05e7b7bfd43534fd4325f6 Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Thu, 18 Sep 2025 08:33:39 -0700
Subject: [PATCH 14/15] update for kontext

---
 tests/openvino/utils_tests.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index 8bf052910e..dfa99eb6d6 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -267,6 +267,13 @@
         "text_encoder": 64,
         "text_encoder_2": 64,
     },
+    "flux-kontext": {
+        "transformer": 56,
+        "vae_decoder": 28,
+        "vae_encoder": 24,
+        "text_encoder": 64,
+        "text_encoder_2": 64,
+    },
     "llava": {
         "lm_model": 30,
         "text_embeddings_model": 1,

From 4ae8de2ca6fdd39bbcda57264b77db3f5fe41338 Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Sat, 27 Sep 2025 08:00:38 -0700
Subject: [PATCH 15/15] update int8 test case

---
 tests/openvino/utils_tests.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index dfa99eb6d6..1a1ad79552 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -268,11 +268,11 @@
         "text_encoder_2": 64,
     },
     "flux-kontext": {
-        "transformer": 56,
-        "vae_decoder": 28,
-        "vae_encoder": 24,
+        "transformer": 60,
+        "vae_decoder": 30,
+        "vae_encoder": 26,
         "text_encoder": 64,
-        "text_encoder_2": 64,
+        "text_encoder_2": 76,
     },
     "llava": {
         "lm_model": 30,