fix

gushiqiao · gushiqiao · commit 25345ab50bcf · 2025-12-29T08:14:08.000Z
diff --git a/lightx2v/common/ops/mm/mm_weight.py b/lightx2v/common/ops/mm/mm_weight.py
@@ -1531,15 +1531,15 @@ def __init__(
         if ops is not None:
             self.act_quant_func = self.act_quant_fp8_perchannel_sym_vllm
         else:
-            self.act_quant_func = self.fp8_quantize_triton
+            self.act_quant_func = fp8_quantize_triton
 
     def apply(self, input_tensor):
         input_tensor_quant, input_tensor_scale = self.act_quant_func(input_tensor)
         output_tensor = fp8_linear(
             input_tensor_quant,
             self.weight,
             self.bias.float() if self.bias is not None else None,
-            input_tensor_scale,
+            input_tensor_scale.float(),
             self.weight_scale,
             out_dtype=self.infer_dtype,
         )
@@ -1582,15 +1582,15 @@ def __init__(
         if ops is not None:
             self.act_quant_func = self.act_quant_int8_perchannel_sym_vllm
         else:
-            self.act_quant_func = self.int8_quantize_triton
+            self.act_quant_func = int8_quantize_triton
 
     def apply(self, input_tensor):
         input_tensor_quant, input_tensor_scale = self.act_quant_func(input_tensor)
         output_tensor = q8_linear(
             input_tensor_quant,
             self.weight,
             self.bias.float() if self.bias is not None else None,
-            input_tensor_scale,
+            input_tensor_scale.float(),
             self.weight_scale,
             fuse_gelu=False,
             out_dtype=self.infer_dtype,
diff --git a/lightx2v/models/video_encoders/hf/qwen_image/vae.py b/lightx2v/models/video_encoders/hf/qwen_image/vae.py
@@ -36,6 +36,8 @@ def load(self):
         with open(os.path.join(self.config["model_path"], "vae", "config.json"), "r") as f:
             vae_config = json.load(f)
             self.vae_scale_factor = 2 ** len(vae_config["temperal_downsample"]) if "temperal_downsample" in vae_config else 8
+        if self.config.get("use_tiling_vae", False):
+            self.model.enable_tiling()
 
     @staticmethod
     def _unpack_latents(latents, height, width, vae_scale_factor):