diff --git a/ppdiffusers/examples/Fast-Diffusers/Training-Free/taylorseer/cache_functions/force_scheduler.py b/ppdiffusers/examples/Fast-Diffusers/Training-Free/taylorseer/cache_functions/force_scheduler.py
index 2dde27b4d..767c1269d 100644
--- a/ppdiffusers/examples/Fast-Diffusers/Training-Free/taylorseer/cache_functions/force_scheduler.py
+++ b/ppdiffusers/examples/Fast-Diffusers/Training-Free/taylorseer/cache_functions/force_scheduler.py
@@ -11,8 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import torch
+import paddle
 
 
 def force_scheduler(cache_dic, current):
@@ -22,10 +21,10 @@ def force_scheduler(cache_dic, current):
     else:
         # TokenCache
         linear_step_weight = 0.0
-    step_factor = torch.tensor(
+    step_factor = paddle.to_tensor(
         1 - linear_step_weight + 2 * linear_step_weight * current["step"] / current["num_steps"]
     )
-    threshold = torch.round(cache_dic["fresh_threshold"] / step_factor)
+    threshold = paddle.round(cache_dic["fresh_threshold"] / step_factor)
 
     # no force constrain for sensitive steps, cause the performance is good enough.
     # you may have a try.
diff --git a/ppdiffusers/examples/Fast-Diffusers/Training-Free/taylorseer/forwards/__init__.py b/ppdiffusers/examples/Fast-Diffusers/Training-Free/taylorseer/forwards/__init__.py
index f31751acb..376c0e02b 100644
--- a/ppdiffusers/examples/Fast-Diffusers/Training-Free/taylorseer/forwards/__init__.py
+++ b/ppdiffusers/examples/Fast-Diffusers/Training-Free/taylorseer/forwards/__init__.py
@@ -15,4 +15,3 @@
 from .double_transformer_forward import taylorseer_flux_double_block_forward
 from .flux_forward import taylorseer_flux_forward
 from .single_transformer_forward import taylorseer_flux_single_block_forward
-from .xfuser_flux_forward import taylorseer_xfuser_flux_forward
diff --git a/ppdiffusers/examples/Fast-Diffusers/Training-Free/taylorseer/forwards/xfuser_flux_forward.py b/ppdiffusers/examples/Fast-Diffusers/Training-Free/taylorseer/forwards/xfuser_flux_forward.py
deleted file mode 100644
index 364a795f3..000000000
--- a/ppdiffusers/examples/Fast-Diffusers/Training-Free/taylorseer/forwards/xfuser_flux_forward.py
+++ /dev/null
@@ -1,239 +0,0 @@
-# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Any, Dict, Optional, Union
-
-import paddle
-from cache_functions import cache_init, cal_type
-from xfuser.core.distributed.parallel_state import is_pipeline_first_stage
-
-from ppdiffusers.models import FluxTransformer2DModel
-from ppdiffusers.models.modeling_outputs import Transformer2DModelOutput
-from ppdiffusers.utils import (
-    USE_PEFT_BACKEND,
-    is_paddle_version,
-    logging,
-    scale_lora_layers,
-    unscale_lora_layers,
-)
-
-logger = logging.get_logger(__name__)
-
-
-def taylorseer_xfuser_flux_forward(
-    self: FluxTransformer2DModel,
-    hidden_states: paddle.Tensor,
-    encoder_hidden_states: paddle.Tensor = None,
-    pooled_projections: paddle.Tensor = None,
-    timestep: paddle.Tensor = None,
-    img_ids: paddle.Tensor = None,
-    txt_ids: paddle.Tensor = None,
-    guidance: paddle.Tensor = None,
-    joint_attention_kwargs: Optional[Dict[str, Any]] = None,
-    controlnet_block_samples=None,
-    controlnet_single_block_samples=None,
-    return_dict: bool = True,
-    controlnet_blocks_repeat: bool = False,
-) -> Union[paddle.Tensor, Transformer2DModelOutput]:
-    """
-    The [`FluxTransformer2DModel`] forward method.
-    Args:
-        hidden_states (`paddle.FloatTensor` of shape `(batch size, channel, height, width)`):
-            Input `hidden_states`.
-        encoder_hidden_states (`paddle.FloatTensor` of shape `(batch size, sequence_len, embed_dims)`):
-            Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
-        pooled_projections (`paddle.FloatTensor` of shape `(batch_size, projection_dim)`): Embeddings projected
-            from the embeddings of input conditions.
-        timestep ( `paddle.LongTensor`):
-            Used to indicate denoising step.
-        block_controlnet_hidden_states: (`list` of `paddle.Tensor`):
-            A list of tensors that if specified are added to the residuals of transformer blocks.
-        joint_attention_kwargs (`dict`, *optional*):
-            A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
-            `self.processor` in
-            [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
-        return_dict (`bool`, *optional*, defaults to `True`):
-            Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain
-            tuple.
-    Returns:
-        If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
-        `tuple` where the first element is the sample tensor.
-    """
-
-    if joint_attention_kwargs is None:
-        joint_attention_kwargs = {}
-    if joint_attention_kwargs.get("cache_dic", None) is None:
-        joint_attention_kwargs["cache_dic"], joint_attention_kwargs["current"] = cache_init(self)
-
-    cal_type(joint_attention_kwargs["cache_dic"], joint_attention_kwargs["current"])
-
-    if joint_attention_kwargs is not None:
-        joint_attention_kwargs = joint_attention_kwargs.copy()
-        lora_scale = joint_attention_kwargs.pop("scale", 1.0)
-    else:
-        lora_scale = 1.0
-
-    if USE_PEFT_BACKEND:
-        # weight the lora layers by setting `lora_scale` for each PEFT layer
-        scale_lora_layers(self, lora_scale)
-    else:
-        if joint_attention_kwargs is not None and joint_attention_kwargs.get("scale", None) is not None:
-            logger.warning(
-                "Passing `scale` via `joint_attention_kwargs` when not using the PEFT backend is ineffective."
-            )
-
-    if is_pipeline_first_stage():
-        hidden_states = self.x_embedder(hidden_states)
-
-    timestep = timestep.to(hidden_states.dtype) * 1000
-    if guidance is not None:
-        guidance = guidance.to(hidden_states.dtype) * 1000
-    else:
-        guidance = None
-    temb = (
-        self.time_text_embed(timestep, pooled_projections)
-        if guidance is None
-        else self.time_text_embed(timestep, guidance, pooled_projections)
-    )
-    if is_pipeline_first_stage():
-        encoder_hidden_states = self.context_embedder(encoder_hidden_states)
-
-    if txt_ids.ndim == 3:
-        logger.warning(
-            "Passing `txt_ids` 3d paddle.Tensor is deprecated."
-            "Please remove the batch dimension and pass it as a 2d paddle Tensor"
-        )
-        txt_ids = txt_ids[0]
-    if img_ids.ndim == 3:
-        logger.warning(
-            "Passing `img_ids` 3d paddle.Tensor is deprecated."
-            "Please remove the batch dimension and pass it as a 2d paddle Tensor"
-        )
-        img_ids = img_ids[0]
-
-    ids = paddle.cat((txt_ids, img_ids), dim=0)
-    image_rotary_emb = self.pos_embed(ids)
-
-    if joint_attention_kwargs is not None and "ip_adapter_image_embeds" in joint_attention_kwargs:
-        ip_adapter_image_embeds = joint_attention_kwargs.pop("ip_adapter_image_embeds")
-        ip_hidden_states = self.encoder_hid_proj(ip_adapter_image_embeds)
-        joint_attention_kwargs.update({"ip_hidden_states": ip_hidden_states})
-
-    joint_attention_kwargs["current"]["stream"] = "double_stream"
-
-    for index_block, block in enumerate(self.transformer_blocks):
-
-        joint_attention_kwargs["current"]["layer"] = index_block
-
-        if self.training and self.gradient_checkpointing:
-
-            def create_custom_forward(module, return_dict=None):
-                def custom_forward(*inputs):
-                    if return_dict is not None:
-                        return module(*inputs, return_dict=return_dict)
-                    else:
-                        return module(*inputs)
-
-                return custom_forward
-
-            ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_paddle_version(">=", "1.11.0") else {}
-            encoder_hidden_states, hidden_states = paddle.utils.checkpoint.checkpoint(
-                create_custom_forward(block),
-                hidden_states,
-                encoder_hidden_states,
-                temb,
-                image_rotary_emb,
-                **ckpt_kwargs,
-            )
-
-        else:
-            encoder_hidden_states, hidden_states = block(
-                hidden_states=hidden_states,
-                encoder_hidden_states=encoder_hidden_states,
-                temb=temb,
-                image_rotary_emb=image_rotary_emb,
-                joint_attention_kwargs=joint_attention_kwargs,
-            )
-
-        # controlnet residual
-        # if controlnet_block_samples is not None:
-        #     interval_control = len(self.transformer_blocks) / len(controlnet_block_samples)
-        #     interval_control = int(np.ceil(interval_control))
-        #     hidden_states = hidden_states + controlnet_block_samples[index_block // interval_control]
-
-    # if self.stage_info.after_flags["transformer_blocks"]:
-    hidden_states = paddle.cat([encoder_hidden_states, hidden_states], dim=1)
-
-    joint_attention_kwargs["current"]["stream"] = "single_stream"
-
-    for index_block, block in enumerate(self.single_transformer_blocks):
-
-        joint_attention_kwargs["current"]["layer"] = index_block
-
-        if self.training and self.gradient_checkpointing:
-
-            def create_custom_forward(module, return_dict=None):
-                def custom_forward(*inputs):
-                    if return_dict is not None:
-                        return module(*inputs, return_dict=return_dict)
-                    else:
-                        return module(*inputs)
-
-                return custom_forward
-
-            ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_paddle_version(">=", "1.11.0") else {}
-            hidden_states = paddle.utils.checkpoint.checkpoint(
-                create_custom_forward(block),
-                hidden_states,
-                temb,
-                image_rotary_emb,
-                **ckpt_kwargs,
-            )
-
-        else:
-            hidden_states = block(
-                hidden_states=hidden_states,
-                temb=temb,
-                image_rotary_emb=image_rotary_emb,
-                joint_attention_kwargs=joint_attention_kwargs,
-            )
-
-        # controlnet residual
-        # if controlnet_single_block_samples is not None:
-        #     interval_control = len(self.single_transformer_blocks) / len(controlnet_single_block_samples)
-        #     interval_control = int(np.ceil(interval_control))
-        #     hidden_states[:, encoder_hidden_states.shape[1] :, ...] = (
-        #         hidden_states[:, encoder_hidden_states.shape[1] :, ...]
-        #         + controlnet_single_block_samples[index_block // interval_control]
-        #     )
-
-    encoder_hidden_states = hidden_states[:, : encoder_hidden_states.shape[1], ...]
-    hidden_states = hidden_states[:, encoder_hidden_states.shape[1] :, ...]
-
-    if self.stage_info.after_flags["single_transformer_blocks"]:
-        hidden_states = self.norm_out(hidden_states, temb)
-        output = self.proj_out(hidden_states), None
-    else:
-        output = hidden_states, encoder_hidden_states
-
-    if USE_PEFT_BACKEND:
-        # remove `lora_scale` from each PEFT layer
-        unscale_lora_layers(self, lora_scale)
-
-    joint_attention_kwargs["current"]["step"] += 1
-
-    if not return_dict:
-        return (output,)
-
-    return Transformer2DModelOutput(sample=output)