|
1 |
| -// RUN: triton-opt %s -split-input-file --intel-allocate-shared-memory --convert-triton-intel-gpu-to-llvm --convert-tritongen-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm |
| 1 | +// RUN: triton-opt %s -split-input-file --intel-allocate-shared-memory --convert-triton-intel-gpu-to-llvm --convert-tritongen-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm --dump-input-context=20 |
2 | 2 |
|
3 | 3 | module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
|
4 | 4 | // CHECK: llvm.func spir_kernelcc @test_empty_kernel(%arg0: i64, %arg1: !llvm.ptr<1>, %arg2: !llvm.ptr<1>)
|
@@ -624,20 +624,11 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
|
624 | 624 | // CHECK-NEXT: llvm.mlir.constant(1 : i32) : i32
|
625 | 625 | // CHECK-NEXT: llvm.mlir.constant(32 : i32) : i32
|
626 | 626 | // CHECK-NEXT: llvm.mlir.constant(512 : i32) : i32
|
627 |
| - // CHECK-NEXT: llvm.mlir.constant(0 : i32) : i32 |
628 | 627 | // CHECK-NEXT: llvm.mul
|
629 |
| - // CHECK-NEXT: llvm.add |
630 |
| - // CHECK-NEXT: llvm.mul |
631 |
| - // CHECK-NEXT: llvm.add |
632 |
| - // CHECK-NEXT: llvm.mul |
633 |
| - // CHECK-NEXT: llvm.add |
634 |
| - // CHECK-NEXT: llvm.add |
635 |
| - // CHECK-NEXT: llvm.add |
636 | 628 | // CHECK-NEXT: llvm.getelementptr
|
637 | 629 | %index = arith.constant 1 : i32
|
638 |
| - %zero = arith.constant 0 : i32 |
639 | 630 | %0 = ttg.local_alloc : () -> !ttg.memdesc<128x16x32xf32, #shared0, #smem, mutable>
|
640 |
| - %1 = ttg.memdesc_subview %0[%index, %zero, %zero] : !ttg.memdesc<128x16x32xf32, #shared0, #smem, mutable> -> !ttg.memdesc<16x32xf32, #shared0, #smem, mutable> |
| 631 | + %1 = ttg.memdesc_index %0, %index : !ttg.memdesc<128x16x32xf32, #shared0, #smem, mutable> -> !ttg.memdesc<16x32xf32, #shared0, #smem, mutable> |
641 | 632 | tt.return
|
642 | 633 | }
|
643 | 634 | }
|
@@ -1945,7 +1936,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 8 : i32, "ttg.thr
|
1945 | 1936 | tt.func public @test_local_load_bf16() {
|
1946 | 1937 | %c0_i32 = arith.constant 0 : i32
|
1947 | 1938 | %19 = ttg.local_alloc : () -> !ttg.memdesc<1x1x2048xbf16, #shared, #smem, mutable>
|
1948 |
| - %22 = ttg.memdesc_subview %19[%c0_i32, %c0_i32, %c0_i32] : !ttg.memdesc<1x1x2048xbf16, #shared, #ttg.shared_memory, mutable> -> !ttg.memdesc<1x2048xbf16, #shared, #ttg.shared_memory, mutable> |
| 1939 | + %22 = ttg.memdesc_index %19, %c0_i32 : !ttg.memdesc<1x1x2048xbf16, #shared, #ttg.shared_memory, mutable> -> !ttg.memdesc<1x2048xbf16, #shared, #ttg.shared_memory, mutable> |
1949 | 1940 | %39 = ttg.local_load %22 : !ttg.memdesc<1x2048xbf16, #shared, #ttg.shared_memory, mutable> -> tensor<1x2048xbf16, #blocked>
|
1950 | 1941 | %40 = arith.extf %39 : tensor<1x2048xbf16, #blocked> to tensor<1x2048xf32, #blocked>
|
1951 | 1942 | tt.return
|
@@ -1977,7 +1968,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.targ
|
1977 | 1968 | tt.func public @test_local_store_subview(%arg0: tensor<1xf32, #blocked>) {
|
1978 | 1969 | %c0_i32 = arith.constant 0 : i32
|
1979 | 1970 | %0 = ttg.local_alloc {allocation.offset = 0 : i32} : () -> !ttg.memdesc<1xf32, #shared, #smem, mutable>
|
1980 |
| - %sv = ttg.memdesc_subview %0[%c0_i32] : !ttg.memdesc<1xf32, #shared, #smem, mutable> -> !ttg.memdesc<1xf32, #shared, #smem, mutable> |
| 1971 | + %sv = ttg.memdesc_subslice %0[0] : !ttg.memdesc<1xf32, #shared, #smem, mutable> -> !ttg.memdesc<1xf32, #shared, #smem, mutable> |
1981 | 1972 | ttg.local_store %arg0, %sv : tensor<1xf32, #blocked> -> !ttg.memdesc<1xf32, #shared, #smem, mutable>
|
1982 | 1973 | tt.return
|
1983 | 1974 | }
|
|
0 commit comments