[pallas backend] implement more ops (pytorch#167951)

oulgen · pytorchmergebot · commit 9ff192239748 · 2025-11-18T05:18:43.000Z
Pull Request resolved: pytorch#167951 Approved by: https://github.com/jansel ghstack dependencies: pytorch#167947
diff --git a/test/inductor/test_pallas.py b/test/inductor/test_pallas.py
@@ -564,6 +564,132 @@ def fn(x, y):
         expected = fn(x, y)
         self.assertEqual(result, expected)
 
+    def test_where(self):
+        """Test torch.where operation."""
+
+        def fn(x, y):
+            return torch.where(x > 0, x, y)
+
+        compiled = self._compile(fn)
+
+        x = torch.randn(16, device=self.DEVICE)
+        y = torch.randn(16, device=self.DEVICE)
+        result = compiled(x, y)
+        expected = fn(x, y)
+        self.assertEqual(result, expected)
+
+    def test_clamp(self):
+        """Test torch.clamp operation."""
+
+        def fn(x):
+            return torch.clamp(x, -1.0, 1.0)
+
+        compiled = self._compile(fn)
+
+        x = torch.randn(16, device=self.DEVICE) * 2
+        result = compiled(x)
+        expected = fn(x)
+        self.assertEqual(result, expected)
+
+    def test_comparison_ops(self):
+        """Test comparison operations."""
+
+        def fn(a, b):
+            gt = a > b
+            lt = a < b
+            eq = a == b
+            return gt.float() + lt.float() + eq.float()
+
+        compiled = self._compile(fn)
+
+        a = torch.randn(16, device=self.DEVICE)
+        b = torch.randn(16, device=self.DEVICE)
+        result = compiled(a, b)
+        expected = fn(a, b)
+        self.assertEqual(result, expected)
+
+    def test_logical_ops(self):
+        """Test logical operations."""
+
+        def fn(a, b):
+            return torch.logical_and(a > 0, b > 0).float()
+
+        compiled = self._compile(fn)
+
+        a = torch.randn(16, device=self.DEVICE)
+        b = torch.randn(16, device=self.DEVICE)
+        result = compiled(a, b)
+        expected = fn(a, b)
+        self.assertEqual(result, expected)
+
+    def test_sign(self):
+        """Test sign operation."""
+
+        def fn(x):
+            return torch.sign(x)
+
+        compiled = self._compile(fn)
+
+        x = torch.randn(16, device=self.DEVICE)
+        result = compiled(x)
+        expected = fn(x)
+        self.assertEqual(result, expected)
+
+    def test_reciprocal(self):
+        """Test reciprocal operation."""
+
+        def fn(x):
+            return torch.reciprocal(x)
+
+        compiled = self._compile(fn)
+
+        x = torch.randn(16, device=self.DEVICE) + 1.0  # Avoid zeros
+        result = compiled(x)
+        expected = fn(x)
+        self.assertEqual(result, expected)
+
+    def test_square(self):
+        """Test square operation."""
+
+        def fn(x):
+            return torch.square(x)
+
+        compiled = self._compile(fn)
+
+        x = torch.randn(16, device=self.DEVICE)
+        result = compiled(x)
+        expected = fn(x)
+        self.assertEqual(result, expected)
+
+    def test_erf(self):
+        """Test erf operation."""
+        if self.DEVICE == "cuda":
+            self.skipTest("erf not supported in Pallas GPU (Triton) backend")
+
+        def fn(x):
+            return torch.erf(x)
+
+        compiled = self._compile(fn)
+
+        x = torch.randn(16, device=self.DEVICE)
+        result = compiled(x)
+        expected = fn(x)
+        self.assertEqual(result, expected)
+
+    def test_atan2(self):
+        """Test atan2 operation."""
+
+        def fn(a, b):
+            return torch.atan2(a, b)
+
+        compiled = self._compile(fn)
+
+        a = torch.randn(16, device=self.DEVICE)
+        b = torch.randn(16, device=self.DEVICE)
+        result = compiled(a, b)
+        expected = fn(a, b)
+        self.assertEqual(result, expected)
+
 
 @unittest.skipUnless(HAS_PALLAS, "requires jax and pallas")
 class PallasTestsCUDA(PallasTestsMixin, TestCase):
diff --git a/torch/_inductor/codegen/pallas.py b/torch/_inductor/codegen/pallas.py
@@ -244,6 +244,163 @@ def view_as_complex(x: str) -> str:
         """View real tensor as complex tensor."""
         return f"({x}[..., 0] + 1j * {x}[..., 1])"
 
+    # Comparison operations
+    @staticmethod
+    def eq(a: str, b: str) -> str:
+        return f"({a} == {b})"
+
+    @staticmethod
+    def ne(a: str, b: str) -> str:
+        return f"({a} != {b})"
+
+    @staticmethod
+    def lt(a: str, b: str) -> str:
+        return f"({a} < {b})"
+
+    @staticmethod
+    def le(a: str, b: str) -> str:
+        return f"({a} <= {b})"
+
+    @staticmethod
+    def gt(a: str, b: str) -> str:
+        return f"({a} > {b})"
+
+    @staticmethod
+    def ge(a: str, b: str) -> str:
+        return f"({a} >= {b})"
+
+    # Logical operations
+    @staticmethod
+    def logical_and(a: str, b: str) -> str:
+        return f"jnp.logical_and({a}, {b})"
+
+    @staticmethod
+    def logical_or(a: str, b: str) -> str:
+        return f"jnp.logical_or({a}, {b})"
+
+    @staticmethod
+    def logical_not(x: str) -> str:
+        return f"jnp.logical_not({x})"
+
+    @staticmethod
+    def logical_xor(a: str, b: str) -> str:
+        return f"jnp.logical_xor({a}, {b})"
+
+    # Math operations
+    @staticmethod
+    def atan2(a: str, b: str) -> str:
+        return f"jnp.arctan2({a}, {b})"
+
+    @staticmethod
+    def hypot(a: str, b: str) -> str:
+        return f"jnp.hypot({a}, {b})"
+
+    @staticmethod
+    def fmod(a: str, b: str) -> str:
+        return f"jnp.fmod({a}, {b})"
+
+    @staticmethod
+    def remainder(a: str, b: str) -> str:
+        return f"jnp.remainder({a}, {b})"
+
+    @staticmethod
+    def clamp(x: str, min_val: str, max_val: str) -> str:
+        return f"jnp.clip({x}, {min_val}, {max_val})"
+
+    @staticmethod
+    def clip(x: str, min_val: str, max_val: str) -> str:
+        return f"jnp.clip({x}, {min_val}, {max_val})"
+
+    # Sign operations
+    @staticmethod
+    def sign(x: str) -> str:
+        return f"jnp.sign({x})"
+
+    @staticmethod
+    def signbit(x: str) -> str:
+        return f"jnp.signbit({x})"
+
+    # Special math functions
+    @staticmethod
+    def erf(x: str) -> str:
+        return f"jax.scipy.special.erf({x})"
+
+    @staticmethod
+    def erfc(x: str) -> str:
+        return f"jax.scipy.special.erfc({x})"
+
+    @staticmethod
+    def erfinv(x: str) -> str:
+        return f"jax.scipy.special.erfinv({x})"
+
+    @staticmethod
+    def lgamma(x: str) -> str:
+        return f"jax.scipy.special.gammaln({x})"
+
+    @staticmethod
+    def digamma(x: str) -> str:
+        return f"jax.scipy.special.digamma({x})"
+
+    # Reciprocal and square
+    @staticmethod
+    def reciprocal(x: str) -> str:
+        return f"jnp.reciprocal({x})"
+
+    @staticmethod
+    def square(x: str) -> str:
+        return f"jnp.square({x})"
+
+    # Additional operations
+    @staticmethod
+    def fma(a: str, b: str, c: str) -> str:
+        """Fused multiply-add: a * b + c"""
+        return f"jnp.fma({a}, {b}, {c})"
+
+    @staticmethod
+    def copysign(a: str, b: str) -> str:
+        return f"jnp.copysign({a}, {b})"
+
+    @staticmethod
+    def nextafter(a: str, b: str) -> str:
+        return f"jnp.nextafter({a}, {b})"
+
+    @staticmethod
+    def ldexp(a: str, b: str) -> str:
+        return f"jnp.ldexp({a}, {b})"
+
+    @staticmethod
+    def frexp(x: str) -> str:
+        return f"jnp.frexp({x})"
+
+    @staticmethod
+    def modf(x: str) -> str:
+        return f"jnp.modf({x})"
+
+    # Bitwise operations
+    @staticmethod
+    def bitwise_and(a: str, b: str) -> str:
+        return f"jnp.bitwise_and({a}, {b})"
+
+    @staticmethod
+    def bitwise_or(a: str, b: str) -> str:
+        return f"jnp.bitwise_or({a}, {b})"
+
+    @staticmethod
+    def bitwise_xor(a: str, b: str) -> str:
+        return f"jnp.bitwise_xor({a}, {b})"
+
+    @staticmethod
+    def bitwise_not(x: str) -> str:
+        return f"jnp.bitwise_not({x})"
+
+    @staticmethod
+    def left_shift(a: str, b: str) -> str:
+        return f"jnp.left_shift({a}, {b})"
+
+    @staticmethod
+    def right_shift(a: str, b: str) -> str:
+        return f"jnp.right_shift({a}, {b})"
+
 
 class PallasKernel(SIMDKernel):
     """