@@ -5,7 +5,7 @@ module attributes {"triton_intel_gpu.support_sg_2d_block", "triton_intel_gpu.sup
5
5
// CHECK-DAG: llvm.func spir_funccc @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPU3AS1viiiDv2_iPv(i32, i32, i32, i32, !llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>, !llvm.ptr {llvm.nonnull, llvm.writeonly}) attributes {no_unwind, will_return}
6
6
// CHECK-DAG: llvm.func spir_funccc @_Z41__spirv_Subgroup2DBlockLoadTransformINTELiiiiPU3AS1viiiDv2_iPv(i32, i32, i32, i32, !llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>, !llvm.ptr {llvm.nonnull, llvm.writeonly}) attributes {no_unwind, will_return}
7
7
// CHECK-DAG: llvm.func spir_funccc @_Z33__spirv_Subgroup2DBlockStoreINTELiiiiPvPU3AS1viiiDv2_i(i32, i32, i32, i32, !llvm.ptr {llvm.nonnull, llvm.readonly}, !llvm.ptr<1> {llvm.nonnull, llvm.writeonly}, i32, i32, i32, vector<2xi32>) attributes {no_unwind, will_return}
8
- // CHECK-DAG: llvm.func spir_funccc @_Z45intel_sub_group_2d_block_prefetch_16b_8r16x2cPU3AS1viiiDv2_i( !llvm.ptr<1> {llvm.nonnull}, i32, i32, i32, vector<2xi32>) attributes {memory_effects = #llvm.memory_effects<other = none, argMem = read, inaccessibleMem = none>, no_unwind}
8
+ // CHECK-DAG: llvm.func spir_funccc @_Z36__spirv_Subgroup2DBlockPrefetchINTELiiiiPU3AS1viiiDv2_i(i32, i32, i32, i32, !llvm.ptr<1> {llvm.nonnull}, i32, i32, i32, vector<2xi32>) attributes {memory_effects = #llvm.memory_effects<other = none, argMem = read, inaccessibleMem = none>, no_unwind}
9
9
10
10
tt.func public @matmul_kernel_with_block_pointers (%arg0: !tt.ptr <f16 , 1 >, %arg1: !tt.ptr <f16 , 1 >, %arg2: !tt.ptr <f32 , 1 >, %arg3: i32 , %arg4: i32 , %arg5: i32 ) {
11
11
// CHECK-LABEL: @matmul_kernel_with_block_pointers
@@ -46,7 +46,7 @@ module attributes {"triton_intel_gpu.support_sg_2d_block", "triton_intel_gpu.sup
46
46
// CHECK-NEXT: [[INSERT1:%.*]] = llvm.insertelement {{.*}}, [[INSERT0]][[[ONE]] : i32] : vector<2xi32>
47
47
%14 = tt.make_tensor_ptr %arg0 , [%c4096_i64 , %c4096_i64 ], [%c4096_i64 , %c1_i64 ], [%13 , %c0_i32 ] {order = array<i32 : 1 , 0 >} : <tensor <8 x32 xf16 >, 1 >
48
48
49
- // CHECK: llvm.call spir_funccc @_Z45intel_sub_group_2d_block_prefetch_16b_8r16x2cPU3AS1viiiDv2_i( %arg0, {{.*}})
49
+ // CHECK: llvm.call spir_funccc @_Z36__spirv_Subgroup2DBlockPrefetchINTELiiiiPU3AS1viiiDv2_i({{.*}}, %arg0, {{.*}})
50
50
triton_intel_gpu.prefetch %14 {cache = 1 : i32 , evict = 1 : i32 , isVolatile = false } : !tt.ptr <tensor <8 x32 xf16 >, 1 >
51
51
%18 = arith.divsi %1 , %c4_i32 : i32
52
52
%19 = arith.andi %18 , %c7_i32 : i32
0 commit comments