Skip to content

Commit 05c889a

Browse files
committed
Update test check.
1 parent 2546a37 commit 05c889a

File tree

1 file changed

+22
-28
lines changed

1 file changed

+22
-28
lines changed

mlir/test/Conversion/XeGPUToXeVM/prefetch_nd.mlir

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,34 @@
11
// RUN: mlir-opt -convert-xegpu-to-xevm %s | FileCheck %s
22

33
gpu.module @prefetch_nd_check {
4+
// CHECK-LABEL: gpu.func @prefetch_nd(
5+
// CHECK-SAME: %[[ARG0:.*]]: memref<8x16xf32, 1>, %[[ARG1:.*]]: memref<8x16xf32, 1>) kernel {
46
gpu.func @prefetch_nd(%src: memref<8x16xf32, 1>, %dst: memref<8x16xf32, 1>) kernel {
7+
// CHECK: %[[MEMSPACECAST:.*]] = memref.memory_space_cast %[[ARG0]] : memref<8x16xf32, 1> to memref<8x16xf32>
8+
// CHECK: %[[INTPTR:.*]] = memref.extract_aligned_pointer_as_index %[[MEMSPACECAST]] : memref<8x16xf32> -> index
9+
// CHECK: %[[VAR0:.*]] = arith.index_castui %[[INTPTR]] : index to i64
510
%srcce = memref.memory_space_cast %src : memref<8x16xf32, 1> to memref<8x16xf32>
11+
// CHECK: %[[MEMSPACECAST_0:.*]] = memref.memory_space_cast %[[ARG1]] : memref<8x16xf32, 1> to memref<8x16xf32>
612
%dstte = memref.memory_space_cast %dst : memref<8x16xf32, 1> to memref<8x16xf32>
713

8-
// CHECK: %[[LD_PTR_AS_I64:.*]] = arith.index_castui {{.*}} : index to i64
9-
// CHECK: %[[LD_CREATE_DESC_I64:.*]] = vector.bitcast {{.*}} : vector<8xi32> to vector<4xi64>
10-
// CHECK: %[[LD_DESC_0:.*]] = vector.insert %[[LD_PTR_AS_I64]], %[[LD_CREATE_DESC_I64]] [0] : i64 into vector<4xi64>
11-
// CHECK: %[[LD_DESC_1:.*]] = vector.bitcast %[[LD_DESC_0]] : vector<4xi64> to vector<8xi32>
12-
// CHECK: %[[LD_DESC_2:.*]] = vector.insert {{.*}}, %[[LD_DESC_1]] [2] : i32 into vector<8xi32>
13-
// CHECK: %[[LD_DESC_3:.*]] = vector.insert {{.*}}, %[[LD_DESC_2]] [3] : i32 into vector<8xi32>
14-
// CHECK: %[[LD_DESC_4:.*]] = vector.insert {{.*}}, %[[LD_DESC_3]] [4] : i32 into vector<8xi32>
15-
// CHECK: %[[LD_DESC:.*]] = vector.insert {{.*}}, %[[LD_DESC_4]] [5] : i32 into vector<8xi32>
1614
%src_tdesc = xegpu.create_nd_tdesc %srcce : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32,
17-
#xegpu.block_tdesc_attr<memory_space = global>, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
18-
19-
//CHECK: %[[LD_DESC_I64:.*]] = vector.bitcast %[[LD_DESC]] : vector<8xi32> to vector<4xi64>
20-
//CHECK: %[[PREF_INTPTR:.*]] = vector.extract %[[LD_DESC_I64]][0] : i64 from vector<4xi64>
21-
//CHECK: %[[PREF_BASE_W:.*]] = vector.extract %[[LD_DESC]][2] : i32 from vector<8xi32>
22-
//CHECK: %[[PREF_BASE_H:.*]] = vector.extract %[[LD_DESC]][3] : i32 from vector<8xi32>
23-
//CHECK: %[[PREF_TILE_W64:.*]] = arith.constant 0 : i64
24-
//CHECK: %[[PREF_TILE_W:.*]] = arith.trunci %[[PREF_TILE_W64]] : i64 to i32
25-
//CHECK: %[[PREF_TILE_H64:.*]] = arith.constant 0 : i64
26-
//CHECK: %[[PREF_TILE_H:.*]] = arith.trunci %[[PREF_TILE_H64]] : i64 to i32
27-
//CHECK: %[[PREF_LLVMPTR:.*]] = llvm.inttoptr %[[PREF_INTPTR]] : i64 to !llvm.ptr<1>
28-
//CHECK: %[[PREF_SIZEOF_F32:.*]] = arith.constant 4 : i32
29-
//CHECK: %[[PREF_BASE_ROW_IN_BYTES:.*]] = arith.muli %[[PREF_BASE_W]], %[[PREF_SIZEOF_F32]] : i32
30-
//CHECK: xevm.blockprefetch2d %[[PREF_LLVMPTR]], %[[PREF_BASE_ROW_IN_BYTES]], %[[PREF_BASE_H]],
31-
//CHECK-SAME: %[[PREF_BASE_ROW_IN_BYTES]], %[[PREF_TILE_W]], %[[PREF_TILE_H]]
32-
//CHECK-SAME: <{cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>, elem_size_in_bits = 32 : i32,
33-
//CHECK-SAME: tile_height = 8 : i32, tile_width = 16 : i32, v_blocks = 1 : i32}>
34-
//CHECK-SAME: : (!llvm.ptr<1>, i32, i32, i32, i32, i32)
15+
#xegpu.block_tdesc_attr<memory_space = global>>
16+
// CHECK: %[[C16_I64:.*]] = arith.constant 16 : i64
17+
// CHECK: %[[VAR1:.*]] = arith.trunci %[[C16_I64]] : i64 to i32
18+
// CHECK: %[[C8_I64:.*]] = arith.constant 8 : i64
19+
// CHECK: %[[VAR2:.*]] = arith.trunci %[[C8_I64]] : i64 to i32
20+
// CHECK: %[[C0_I64:.*]] = arith.constant 0 : i64
21+
// CHECK: %[[VAR3:.*]] = arith.trunci %[[C0_I64]] : i64 to i32
22+
// CHECK: %[[C0_I64_1:.*]] = arith.constant 0 : i64
23+
// CHECK: %[[VAR4:.*]] = arith.trunci %[[C0_I64_1]] : i64 to i32
24+
// CHECK: %[[VAR5:.*]] = llvm.inttoptr %[[VAR0]] : i64 to !llvm.ptr<1>
25+
// CHECK: %[[C4_I32:.*]] = arith.constant 4 : i32
26+
// CHECK: %[[VAR6:.*]] = arith.muli %[[VAR1]], %[[C4_I32]] : i32
27+
// CHECK: xevm.blockprefetch2d %[[VAR5]], %[[VAR6]], %[[VAR2]], %[[VAR6]], %[[VAR3]], %[[VAR4]]
28+
// CHECK-SAME: <{cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>, elem_size_in_bits = 32 : i32,
29+
// CHECK-SAME: tile_height = 8 : i32, tile_width = 16 : i32, v_blocks = 1 : i32}>
3530
xegpu.prefetch_nd %src_tdesc[0, 0] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
36-
: !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<memory_space = global>,
37-
#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
31+
: !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<memory_space = global>>
3832

3933
gpu.return
4034
}

0 commit comments

Comments
 (0)