|
1 | | -// RUN: triton-opt %s -split-input-file --intel-allocate-shared-memory --convert-triton-intel-gpu-to-llvm --convert-tritongen-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm |
| 1 | +// RUN: triton-opt %s -split-input-file --intel-allocate-shared-memory --convert-triton-intel-gpu-to-llvm --convert-tritongen-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm --dump-input-context=20 |
2 | 2 |
|
3 | 3 | module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} { |
4 | 4 | // CHECK: llvm.func spir_kernelcc @test_empty_kernel(%arg0: i64, %arg1: !llvm.ptr<1>, %arg2: !llvm.ptr<1>) |
@@ -624,20 +624,11 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} { |
624 | 624 | // CHECK-NEXT: llvm.mlir.constant(1 : i32) : i32 |
625 | 625 | // CHECK-NEXT: llvm.mlir.constant(32 : i32) : i32 |
626 | 626 | // CHECK-NEXT: llvm.mlir.constant(512 : i32) : i32 |
627 | | - // CHECK-NEXT: llvm.mlir.constant(0 : i32) : i32 |
628 | 627 | // CHECK-NEXT: llvm.mul |
629 | | - // CHECK-NEXT: llvm.add |
630 | | - // CHECK-NEXT: llvm.mul |
631 | | - // CHECK-NEXT: llvm.add |
632 | | - // CHECK-NEXT: llvm.mul |
633 | | - // CHECK-NEXT: llvm.add |
634 | | - // CHECK-NEXT: llvm.add |
635 | | - // CHECK-NEXT: llvm.add |
636 | 628 | // CHECK-NEXT: llvm.getelementptr |
637 | 629 | %index = arith.constant 1 : i32 |
638 | | - %zero = arith.constant 0 : i32 |
639 | 630 | %0 = ttg.local_alloc : () -> !ttg.memdesc<128x16x32xf32, #shared0, #smem, mutable> |
640 | | - %1 = ttg.memdesc_subview %0[%index, %zero, %zero] : !ttg.memdesc<128x16x32xf32, #shared0, #smem, mutable> -> !ttg.memdesc<16x32xf32, #shared0, #smem, mutable> |
| 631 | + %1 = ttg.memdesc_index %0, %index : !ttg.memdesc<128x16x32xf32, #shared0, #smem, mutable> -> !ttg.memdesc<16x32xf32, #shared0, #smem, mutable> |
641 | 632 | tt.return |
642 | 633 | } |
643 | 634 | } |
@@ -1945,7 +1936,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 8 : i32, "ttg.thr |
1945 | 1936 | tt.func public @test_local_load_bf16() { |
1946 | 1937 | %c0_i32 = arith.constant 0 : i32 |
1947 | 1938 | %19 = ttg.local_alloc : () -> !ttg.memdesc<1x1x2048xbf16, #shared, #smem, mutable> |
1948 | | - %22 = ttg.memdesc_subview %19[%c0_i32, %c0_i32, %c0_i32] : !ttg.memdesc<1x1x2048xbf16, #shared, #ttg.shared_memory, mutable> -> !ttg.memdesc<1x2048xbf16, #shared, #ttg.shared_memory, mutable> |
| 1939 | + %22 = ttg.memdesc_index %19, %c0_i32 : !ttg.memdesc<1x1x2048xbf16, #shared, #ttg.shared_memory, mutable> -> !ttg.memdesc<1x2048xbf16, #shared, #ttg.shared_memory, mutable> |
1949 | 1940 | %39 = ttg.local_load %22 : !ttg.memdesc<1x2048xbf16, #shared, #ttg.shared_memory, mutable> -> tensor<1x2048xbf16, #blocked> |
1950 | 1941 | %40 = arith.extf %39 : tensor<1x2048xbf16, #blocked> to tensor<1x2048xf32, #blocked> |
1951 | 1942 | tt.return |
@@ -1977,7 +1968,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.targ |
1977 | 1968 | tt.func public @test_local_store_subview(%arg0: tensor<1xf32, #blocked>) { |
1978 | 1969 | %c0_i32 = arith.constant 0 : i32 |
1979 | 1970 | %0 = ttg.local_alloc {allocation.offset = 0 : i32} : () -> !ttg.memdesc<1xf32, #shared, #smem, mutable> |
1980 | | - %sv = ttg.memdesc_subview %0[%c0_i32] : !ttg.memdesc<1xf32, #shared, #smem, mutable> -> !ttg.memdesc<1xf32, #shared, #smem, mutable> |
| 1971 | + %sv = ttg.memdesc_subslice %0[0] : !ttg.memdesc<1xf32, #shared, #smem, mutable> -> !ttg.memdesc<1xf32, #shared, #smem, mutable> |
1981 | 1972 | ttg.local_store %arg0, %sv : tensor<1xf32, #blocked> -> !ttg.memdesc<1xf32, #shared, #smem, mutable> |
1982 | 1973 | tt.return |
1983 | 1974 | } |
|
0 commit comments