[Relax][PyTorch] Fix MultiheadAttention complie (#18459)

guan404ming · web-flow · commit 0225d67d303c · 2025-11-16T01:08:37.000-05:00
## Related Issus closes #18440 ## Why - PyTorch `masked_fill` / `full_like` accept inf or nan and TVM couldn’t handle these values when the tensor dtype was not float, which caused wrong behavior or errors. ## How - If `fill_value` is inf or nan and the tensor dtype is not float → convert the fill to float32. - For masked_fill → Create a float values tensor with full_like. - Cast input to float if needed. - In TOPI → Reject creating full with inf/nan on non-float dtypes.
diff --git a/python/tvm/relax/frontend/torch/base_fx_graph_translator.py b/python/tvm/relax/frontend/torch/base_fx_graph_translator.py
@@ -2085,8 +2085,16 @@ def _full(self, node: fx.Node) -> relax.Var:
 
     def _full_like(self, node: fx.Node) -> relax.Var:
         x = self.env[node.args[0]]
-        fill_value = relax.const(node.args[1])
-        return self.block_builder.emit(relax.op.full_like(x, fill_value))
+        value = node.args[1]
+        fill_value = relax.const(value)
+
+        x_dtype = x.struct_info.dtype
+        fill_dtype = None
+        if isinstance(value, (int, float)) and (math.isinf(value) or math.isnan(value)):
+            if not ("float" in x_dtype or "bfloat16" in x_dtype):
+                fill_dtype = "float32"
+
+        return self.block_builder.emit(relax.op.full_like(x, fill_value, dtype=fill_dtype))
 
     def _index_select(self, node: fx.Node) -> relax.Var:
         x = self.env[node.args[0]]
@@ -2099,7 +2107,19 @@ def _inplace_masked_fill(self, node: fx.Node) -> relax.Var:
         mask = self.env[node.args[1]]
         value = node.args[2]
         rx_value = relax.const(value)
-        values = self.block_builder.emit(relax.op.full_like(x, rx_value))
+
+        x_dtype = x.struct_info.dtype
+        fill_dtype = None
+        if isinstance(value, (int, float)) and (math.isinf(value) or math.isnan(value)):
+            if not ("float" in x_dtype or "bfloat16" in x_dtype):
+                fill_dtype = "float32"
+
+        values = self.block_builder.emit(relax.op.full_like(x, rx_value, dtype=fill_dtype))
+
+        # Cast x to match values dtype if necessary
+        if fill_dtype is not None:
+            x = self.block_builder.emit(relax.op.astype(x, fill_dtype))
+
         output = self.block_builder.emit(relax.op.where(mask, values, x))
         self.env[node.args[0]] = output
         return output
@@ -2130,8 +2150,21 @@ def _linspace(self, node: fx.Node) -> relax.Var:
     def _masked_fill(self, node: fx.Node) -> relax.Var:
         x = self.env[node.args[0]]
         mask = self.env[node.args[1]]
-        rx_value = relax.const(node.args[2])
-        values = self.block_builder.emit(relax.op.full_like(x, rx_value))
+        value = node.args[2]
+        rx_value = relax.const(value)
+
+        x_dtype = x.struct_info.dtype
+        fill_dtype = None
+        if isinstance(value, (int, float)) and (math.isinf(value) or math.isnan(value)):
+            if not ("float" in x_dtype or "bfloat16" in x_dtype):
+                fill_dtype = "float32"
+
+        values = self.block_builder.emit(relax.op.full_like(x, rx_value, dtype=fill_dtype))
+
+        # Cast x to match values dtype if necessary
+        if fill_dtype is not None:
+            x = self.block_builder.emit(relax.op.astype(x, fill_dtype))
+
         return self.block_builder.emit(relax.op.where(mask, values, x))
 
     def _new_ones(self, node: fx.Node) -> relax.Var:
diff --git a/python/tvm/topi/tensor.py b/python/tvm/topi/tensor.py
@@ -17,6 +17,8 @@
 # pylint: disable=invalid-name,consider-using-enumerate,unused-argument,len-as-condition
 """Elementwise operators"""
 
+import math as _math
+
 from typing import Optional
 
 from tvm import te
@@ -57,6 +59,13 @@ def full(shape, dtype, fill_value):
     y : tvm.te.Tensor
         The result.
     """
+
+    if isinstance(fill_value, (int, float)) and (
+        _math.isinf(fill_value) or _math.isnan(fill_value)
+    ):
+        if not ("float" in dtype or "bfloat16" in dtype):
+            raise ValueError("Infinite and NaN require a floating-point dtype.")
+
     return cpp.full(shape, dtype, fill_value)