[Tests] Add tests to check type inference

shintaro-iwasaki · shintaro-iwasaki · commit 07c440e5045c · 2022-10-18T08:33:14.000-07:00
diff --git a/python/tests/test_cast.py b/python/tests/test_cast.py
diff --git a/python/tests/test_type.py b/python/tests/test_type.py
@@ -0,0 +1,80 @@
+import triton
+import triton.language as tl
+
+
+# TODO: function with no arguments don't work
+@triton.jit
+def binop_type_check(X):
+    # 0d-tensor is not allowed.
+    # zero_0d = tl.zeros([], dtype=tl.float32)
+    zero_1d = tl.zeros([2], dtype=tl.float32)
+    zero_2d_21 = tl.zeros([2, 1], dtype=tl.float32)
+    zero_2d_22 = tl.zeros([2, 2], dtype=tl.float32)
+
+    # scalar + scalar -> scalar
+    a0 = 0.0 + 0.0
+    # # scalar + 0D -> 0D
+    # a1 = 0.0 + zero_0d
+    # a2 = zero_0d + 0.0
+    # scalar + 1D -> 1D
+    a3 = 0.0 + zero_1d
+    a4 = zero_1d + 0.0
+    # scalar + 2D -> 2D
+    a5 = 0.0 + zero_2d_22
+    a6 = zero_2d_22 + 0.0
+
+    # # 0D + 0D -> 0D
+    # b1 = zero_0d + zero_0d
+    # # 0D + 1D -> 1D
+    # b2 = zero_0d + zero_1d
+    # b3 = zero_1d + zero_0d
+    # # 0D + 2D -> 2D
+    # b4 = zero_0d + zero_2d_22
+    # b5 = zero_2d_22 + zero_0d
+
+    # 1D + 1D -> 1D
+    c1 = zero_1d + zero_1d
+    # 1D + 2D -> 2D
+    c2 = zero_1d + zero_2d_21
+    c3 = zero_1d + zero_2d_22
+    c4 = zero_2d_21 + zero_1d
+    c5 = zero_2d_22 + zero_1d
+
+    # 2D + 2D -> 2D
+    d1 = zero_2d_21 + zero_2d_21
+    d2 = zero_2d_22 + zero_2d_22
+    d3 = zero_2d_21 + zero_2d_22
+    d4 = zero_2d_22 + zero_2d_21
+
+    # return a0, a1, a2, a3, a4, a5, a6, b1, b2, b3, b4, b5, c1, c2, c3, c4, c5, d1, d2, d3, d4
+    return a0, a3, a4, a5, a6, c1, c2, c3, c4, c5, d1, d2, d3, d4
+
+
+def test_binop_type_check():
+    kernel = triton.compiler._compile(binop_type_check,
+                                      signature="*fp32",
+                                      device=0,
+                                      output="ttgir")
+    assert (kernel)
+    # TODO: Check types of the results
+
+
+@triton.jit
+def reduce_type_check(ptr):
+    v_32 = tl.load(ptr + tl.arange(0, 32))
+    v_scalar = tl.min(v_32, axis=0)
+    tl.store(ptr, v_scalar)
+    v_64x128 = tl.load(ptr + tl.arange(0, 64)[:, None] + tl.arange(0, 128)[None, :])
+    v_64 = tl.max(v_64x128, axis=1)
+    tl.store(ptr + tl.arange(0, 64), v_64)
+    v_128 = tl.max(v_64x128, axis=0)
+    tl.store(ptr + tl.arange(0, 128), v_128)
+
+
+def test_reduce_type_check():
+    kernel = triton.compiler._compile(reduce_type_check,
+                                      signature="*fp32",
+                                      device=0,
+                                      output="ttgir")
+    assert (kernel)
+    # TODO: Check types of the results
diff --git a/test/Conversion/triton_ops.mlir b/test/Conversion/triton_ops.mlir
@@ -55,7 +55,7 @@ func @addptr_ops(%scalar_ptr: !tt.ptr<f32>, %scalar_i32: i32) {
 }
 
 func @load_store_ops_scalar(%ptr: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %mask : i1) {
-  // Test if Load/Store ops can handle scalar values (see #XXX)
+  // Test if Load/Store ops can handle scalar values
   %other = arith.constant 0.0e+0 : f32
 
   // load scalar
@@ -75,3 +75,58 @@ func @load_store_ops_scalar(%ptr: !tt.ptr<f32> {tt.divisibility = 16 : i32}, %ma
   tt.store %ptr, %c, %mask : f32
   return
 }
+
+func @reduce_ops_infer(%ptr: !tt.ptr<f32>, %v : tensor<1x2x4xf32>) {
+  // Test if reduce ops infer types correctly
+
+  // CHECK: %{{.*}} = tt.reduce %{{.*}} -> tensor<2x4xf32>
+  %a = tt.reduce %v {redOp = 1 : i32, axis = 0 : i32} : tensor<1x2x4xf32> -> tensor<2x4xf32>
+  // CHECK: %{{.*}} = tt.reduce %{{.*}} -> tensor<1x4xf32>
+  %b = tt.reduce %v {redOp = 1 : i32, axis = 1 : i32} : tensor<1x2x4xf32> -> tensor<1x4xf32>
+  // CHECK: %{{.*}} = tt.reduce %{{.*}} -> tensor<1x2xf32>
+  %c = tt.reduce %v {redOp = 1 : i32, axis = 2 : i32} : tensor<1x2x4xf32> -> tensor<1x2xf32>
+  // CHECK: %{{.*}} = tt.reduce %{{.*}} -> tensor<1xf32>
+  %e = tt.reduce %b {redOp = 1 : i32, axis = 1 : i32} : tensor<1x4xf32> -> tensor<1xf32>
+  // CHECK: %{{.*}} = tt.reduce %{{.*}} -> tensor<4xf32>
+  %f = tt.reduce %a {redOp = 1 : i32, axis = 0 : i32} : tensor<2x4xf32> -> tensor<4xf32>
+  // CHECK: %{{.*}} = tt.reduce %{{.*}} -> f32
+  %g = tt.reduce %f {redOp = 1 : i32, axis = 0 : i32} : tensor<4xf32> -> f32
+
+  // Avoid optimizations for c, e, and g
+  %ptr1x2 = tt.splat %ptr : (!tt.ptr<f32>) -> tensor<1x2x!tt.ptr<f32>>
+  %ptr1 = tt.splat %ptr : (!tt.ptr<f32>) -> tensor<1x!tt.ptr<f32>>
+  tt.store %ptr1x2, %c : tensor<1x2xf32>
+  tt.store %ptr1, %e : tensor<1xf32>
+  tt.store %ptr, %g : f32
+  return
+}
+
+func @dot_ops_infer(%ptr: !tt.ptr<f32>, %v : f32) {
+  // Test if reduce ops infer types correctly
+  %v128x32 = tt.splat %v : (f32) -> tensor<128x32xf32>
+  %v32x128 = tt.splat %v : (f32) -> tensor<32x128xf32>
+  %v128x1 = tt.splat %v : (f32) -> tensor<128x1xf32>
+  %v1x128 = tt.splat %v : (f32) -> tensor<1x128xf32>
+
+  %zero128x128 = arith.constant dense<0.00e+00> : tensor<128x128xf32>
+  %zero32x32 = arith.constant dense<0.00e+00> : tensor<32x32xf32>
+  %zero1x1 = arith.constant dense<0.00e+00> : tensor<1x1xf32>
+
+  // CHECK: %{{.*}} = tt.dot %{{.*}} -> tensor<128x128xf32>
+  %r1 = tt.dot %v128x32, %v32x128, %zero128x128 {allowTF32 = true} : tensor<128x32xf32> * tensor<32x128xf32> -> tensor<128x128xf32>
+  // CHECK: %{{.*}} = tt.dot %{{.*}} -> tensor<32x32xf32>
+  %r2 = tt.dot %v32x128, %v128x32, %zero32x32 {allowTF32 = true} : tensor<32x128xf32> * tensor<128x32xf32> -> tensor<32x32xf32>
+  // CHECK: %{{.*}} = tt.dot %{{.*}} -> tensor<128x128xf32>
+  %r3 = tt.dot %v128x1, %v1x128, %zero128x128 {allowTF32 = true} : tensor<128x1xf32> * tensor<1x128xf32> -> tensor<128x128xf32>
+  // CHECK: %{{.*}} = tt.dot %{{.*}} -> tensor<1x1xf32>
+  %r4 = tt.dot %v1x128, %v128x1, %zero1x1 {allowTF32 = true} : tensor<1x128xf32> * tensor<128x1xf32> -> tensor<1x1xf32>
+
+  %ptr128x128 = tt.splat %ptr : (!tt.ptr<f32>) -> tensor<128x128x!tt.ptr<f32>>
+  %ptr32x32 = tt.splat %ptr : (!tt.ptr<f32>) -> tensor<32x32x!tt.ptr<f32>>
+  %ptr1x1 = tt.splat %ptr : (!tt.ptr<f32>) -> tensor<1x1x!tt.ptr<f32>>
+  tt.store %ptr128x128, %r1 : tensor<128x128xf32>
+  tt.store %ptr32x32, %r2 : tensor<32x32xf32>
+  tt.store %ptr128x128, %r3 : tensor<128x128xf32>
+  tt.store %ptr1x1, %r4 : tensor<1x1xf32>
+  return
+}