|
1 | 1 | // RUN: triton-opt %s -split-input-file --allocate-shared-memory --convert-triton-intel-gpu-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm |
2 | 2 |
|
| 3 | +// CHECK-DAG: llvm.func spir_funccc @_Z45intel_sub_group_2d_block_prefetch_16b_4r16x1cPU3AS1viiiDv2_i(!llvm.ptr<1> {llvm.nonnull}, i32, i32, i32, vector<2xi32>) attributes {memory_effects = #llvm.memory_effects<other = none, argMem = read, inaccessibleMem = none>, no_unwind} |
3 | 4 | // CHECK-DAG: llvm.func spir_funccc @_Z45intel_sub_group_2d_block_prefetch_16b_4r16x2cPU3AS1viiiDv2_i(!llvm.ptr<1> {llvm.nonnull}, i32, i32, i32, vector<2xi32>) attributes {memory_effects = #llvm.memory_effects<other = none, argMem = read, inaccessibleMem = none>, no_unwind} |
4 | 5 | module attributes {"ttg.num-warps" = 8 : i32, "ttg.threads-per-warp" = 16 : i32} { |
5 | 6 | tt.func public @matmul_with_prefetch(%arg0: !tt.ptr<f16>, %arg1: !tt.ptr<f16>, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: i64) { |
@@ -36,7 +37,7 @@ module attributes {"ttg.num-warps" = 8 : i32, "ttg.threads-per-warp" = 16 : i32} |
36 | 37 | // CHECK: %[[VAL_57:.*]] = llvm.mlir.constant(0 : i32) : i32 |
37 | 38 | // CHECK: %[[VAL_59:.*]] = llvm.insertelement %[[COLUMN_MAJOR_WARP_OFF_X]], {{.*}}{{\[}}%[[VAL_57]] : i32] : vector<2xi32> |
38 | 39 | // CHECK: %[[ROW_MAJOR_COORD:.*]] = llvm.insertelement %[[COLUMN_MAJOR_WARP_OFF_Y]], {{.*}}{{\[}}%[[VAL_56]] : i32] : vector<2xi32> |
39 | | - // CHECK: llvm.call spir_funccc @_Z45intel_sub_group_2d_block_prefetch_16b_4r16x2cPU3AS1viiiDv2_i(%[[ROW_MAJOR_BASE]], %[[ROW_MAJOR_WIDTH]], %[[ROW_MAJOR_HEIGHT]], %[[ROW_MAJOR_STRIDE]], %[[ROW_MAJOR_COORD]]) {{.*}} : (!llvm.ptr<1>{{.*}}, i32, i32, i32, vector<2xi32>) -> () |
| 40 | + // CHECK: llvm.call spir_funccc @_Z45intel_sub_group_2d_block_prefetch_16b_4r16x1cPU3AS1viiiDv2_i(%[[ROW_MAJOR_BASE]], %[[ROW_MAJOR_WIDTH]], %[[ROW_MAJOR_HEIGHT]], %[[ROW_MAJOR_STRIDE]], %[[ROW_MAJOR_COORD]]) {{.*}} : (!llvm.ptr<1>{{.*}}, i32, i32, i32, vector<2xi32>) -> () |
40 | 41 | %rowMajorPtr = tt.make_tensor_ptr %arg0, [%arg2, %arg4], [%arg5, %c1_i64], [%c0_i32, %c0_i32] {order = array<i32: 1, 0>} : <tensor<32x16xf16>> |
41 | 42 | triton_intel_gpu.prefetch %rowMajorPtr {cache = 1 : i32, evict = 1 : i32, isVolatile = false, triton_intel_gpu.block_io = "row_major"} : !tt.ptr<tensor<32x16xf16>> |
42 | 43 |
|
|
0 commit comments