[language][standard] added tl.squeeze / tl.unsqueeze (#8924)

ptillet · web-flow · commit 343052b23b40 · 2025-12-06T12:36:46.000-08:00
diff --git a/python/test/unit/language/test_standard.py b/python/test/unit/language/test_standard.py
@@ -143,3 +143,43 @@ def swizzle2d_kernel(output, size_i, size_j, size_g):
     expected_order = torch.tensor([[0, 3, 6, 9, 12, 15, 18], [1, 4, 7, 10, 13, 16, 19], [2, 5, 8, 11, 14, 17, 20],
                                    [21, 23, 25, 27, 29, 31, 33], [22, 24, 26, 28, 30, 32, 34]]).to(device)
     assert (output == expected_order).all(), (output, expected_order)
+
+
+@pytest.mark.interpreter
+@pytest.mark.parametrize("shape, dim", [((1, 2, 4), 0), ((2, 1, 4), 1), ((2, 4, 1), 2)])
+def test_squeeze(shape, dim, device):
+
+    @triton.jit
+    def triton_squeeze(out_ptr, dim: tl.constexpr, s0: tl.constexpr, s1: tl.constexpr, s2: tl.constexpr):
+        a = tl.arange(0, 8)
+        a = tl.reshape(a, (s0, s1, s2))
+        a = tl.squeeze(a, dim)
+        a = tl.ravel(a)
+        tl.store(out_ptr + tl.arange(0, 8), a)
+
+    out = torch.empty((8, ), device=device, dtype=torch.int32)
+    triton_squeeze[(1, )](out, dim, shape[0], shape[1], shape[2])
+
+    expected = torch.arange(0, 8, device=device, dtype=torch.int32)
+    expected = expected.reshape(shape).squeeze(dim).reshape(-1)
+    assert (out == expected).all()
+
+
+@pytest.mark.interpreter
+@pytest.mark.parametrize("dim", [0, 1, 2])
+def test_unsqueeze(dim, device):
+
+    @triton.jit
+    def triton_unsqueeze(out_ptr, dim: tl.constexpr):
+        a = tl.arange(0, 8)
+        a = tl.reshape(a, (2, 4))
+        a = tl.unsqueeze(a, dim)
+        a = tl.ravel(a)
+        tl.store(out_ptr + tl.arange(0, 8), a)
+
+    out = torch.empty((8, ), device=device, dtype=torch.int32)
+    triton_unsqueeze[(1, )](out, dim)
+
+    expected = torch.arange(0, 8, device=device, dtype=torch.int32)
+    expected = expected.reshape(2, 4).unsqueeze(dim).reshape(-1)
+    assert (out == expected).all()
diff --git a/python/triton/language/__init__.py b/python/triton/language/__init__.py
@@ -19,9 +19,11 @@
     sigmoid,
     softmax,
     sort,
+    squeeze,
     sum,
     swizzle2d,
     topk,
+    unsqueeze,
     xor_sum,
     zeros,
     zeros_like,
@@ -253,6 +255,7 @@
     "split",
     "sqrt",
     "sqrt_rn",
+    "squeeze",
     "static_assert",
     "static_print",
     "static_range",
@@ -272,6 +275,7 @@
     "uint8",
     "uint_to_uniform_float",
     "umulhi",
+    "unsqueeze",
     "view",
     "void",
     "where",
diff --git a/python/triton/language/standard.py b/python/triton/language/standard.py
@@ -534,3 +534,14 @@ def interleave(a, b):
         # understand that if we take the `if` above we definitely don't run this
         # `else`.
         return core.reshape(c, c.shape[:-2] + [2 * c.shape[-2]])
+
+
+@jit
+def squeeze(x, dim: core.constexpr):
+    core.static_assert(x.shape[dim] == 1)
+    return x.reshape(x.shape[:dim] + x.shape[dim + 1:])
+
+
+@jit
+def unsqueeze(x, dim: core.constexpr):
+    return x.reshape(x.shape[:dim] + (1, ) + x.shape[dim:])