|
| 1 | +// RUN: triton-opt %s -split-input-file -allow-unregistered-dialect -tritongpu-assign-latencies=num-stages=3 -canonicalize | FileCheck %s |
| 2 | + |
| 3 | +// Test that ub.poison producing a ptr<tensor> gets correct rank in AxisInfo |
| 4 | +// analysis (rank=2 for tensor<128x64>, not rank=1). |
| 5 | +#mma = #ttig.dpas<{repeatCount = 8, systolicDepth = 8, executionSize = 16, opsPerChan = 2, threadsPerWarp = 16, warpsPerCTA = [2, 2], repCluster = [4, 2], A = [32, 16], B = [16, 32], C = [32, 32]}> |
| 6 | +// CHECK-LABEL: @test_poison_rank |
| 7 | +module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.threads-per-warp" = 16 : i32} { |
| 8 | + tt.func public @test_poison_rank(%arg0: !tt.ptr<f16>, %lb: i32, %ub: i32) { |
| 9 | + %c0_i32 = arith.constant 0 : i32 |
| 10 | + %c1_i32 = arith.constant 1 : i32 |
| 11 | + %c1_i64 = arith.constant 1 : i64 |
| 12 | + %c128_i64 = arith.constant 128 : i64 |
| 13 | + %c64_i64 = arith.constant 64 : i64 |
| 14 | + |
| 15 | + %0 = ub.poison : !tt.ptr<tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #mma, kWidth = 1}>>> |
| 16 | + |
| 17 | + %1 = tt.make_tensor_ptr %arg0, [%c128_i64, %c64_i64], [%c64_i64, %c1_i64], [%c0_i32, %c0_i32] {order = array<i32: 1, 0>} : <tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #mma, kWidth = 1}>>> |
| 18 | + |
| 19 | + %result = scf.for %i = %lb to %ub step %c1_i32 |
| 20 | + iter_args(%ptr = %0) -> !tt.ptr<tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #mma, kWidth = 1}>>> : i32 { |
| 21 | + |
| 22 | + %advanced = tt.advance %ptr, [%c0_i32, %c0_i32] : <tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #mma, kWidth = 1}>>> |
| 23 | + |
| 24 | + scf.yield %advanced : !tt.ptr<tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #mma, kWidth = 1}>>> |
| 25 | + } |
| 26 | + |
| 27 | + tt.return |
| 28 | + } |
| 29 | +} |
0 commit comments