Skip to content

Commit 2546a37

Browse files
committed
Update test check.
1 parent 4e4cbd0 commit 2546a37

File tree

1 file changed

+23
-51
lines changed

1 file changed

+23
-51
lines changed

mlir/test/Conversion/XeGPUToXeVM/loadstore_nd.mlir

Lines changed: 23 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,45 @@
11
// RUN: mlir-opt -convert-xegpu-to-xevm -canonicalize %s | FileCheck %s
22

33
gpu.module @load_store_check {
4+
// CHECK-LABEL: gpu.func @load_store
5+
// CHECK-SAME: %[[ARG0:.*]]: memref<8x16xf32, 1>, %[[ARG1:.*]]: memref<8x16xf32, 1>
46
gpu.func @load_store(%src: memref<8x16xf32, 1>, %dst: memref<8x16xf32, 1>) kernel {
7+
// CHECK: %[[C64_i32:.*]] = arith.constant 64 : i32
8+
// CHECK: %[[C0_i32:.*]] = arith.constant 0 : i32
9+
// CHECK: %[[C8_i32:.*]] = arith.constant 8 : i32
10+
// CHECK: %[[MEMSPACECAST:.*]] = memref.memory_space_cast %[[ARG0]]
11+
// CHECK: %[[INTPTR:.*]] = memref.extract_aligned_pointer_as_index %[[MEMSPACECAST:.*]] : memref<8x16xf32> -> index
12+
// CHECK: %[[VAR0:.*]] = arith.index_castui %[[INTPTR]] : index to i64
513
%srcce = memref.memory_space_cast %src : memref<8x16xf32, 1> to memref<8x16xf32>
14+
// CHECK: %[[MEMSPACECAST_0:.*]] = memref.memory_space_cast %[[ARG1]]
15+
// CHECK: %[[INTPTR_1:.*]] = memref.extract_aligned_pointer_as_index %[[MEMSPACECAST_0]] : memref<8x16xf32> -> index
16+
// CHECK: %[[VAR1:.*]] = arith.index_castui %[[INTPTR_1:.*]] : index to i64
617
%dstte = memref.memory_space_cast %dst : memref<8x16xf32, 1> to memref<8x16xf32>
718

8-
// CHECK: %[[LD_PTR_AS_I64:.*]] = arith.index_castui {{.*}} : index to i64
9-
// CHECK: %[[LD_CREATE_DESC_I64:.*]] = vector.bitcast {{.*}} : vector<8xi32> to vector<4xi64>
10-
// CHECK: %[[LD_DESC_0:.*]] = vector.insert %[[LD_PTR_AS_I64]], %[[LD_CREATE_DESC_I64]] [0] : i64 into vector<4xi64>
11-
// CHECK: %[[LD_DESC_1:.*]] = vector.bitcast %[[LD_DESC_0]] : vector<4xi64> to vector<8xi32>
12-
// CHECK: %[[LD_DESC_2:.*]] = vector.insert {{.*}}, %[[LD_DESC_1]] [2] : i32 into vector<8xi32>
13-
// CHECK: %[[LD_DESC_3:.*]] = vector.insert {{.*}}, %[[LD_DESC_2]] [3] : i32 into vector<8xi32>
14-
// CHECK: %[[LD_DESC_4:.*]] = vector.insert {{.*}}, %[[LD_DESC_3]] [4] : i32 into vector<8xi32>
15-
// CHECK: %[[LD_DESC:.*]] = vector.insert {{.*}}, %[[LD_DESC_4]] [5] : i32 into vector<8xi32>
1619
%src_tdesc = xegpu.create_nd_tdesc %srcce : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32>
1720

18-
19-
//CHECK: %[[LD_DESC_I64:.*]] = vector.bitcast %[[LD_DESC]] : vector<8xi32> to vector<4xi64>
20-
//CHECK: %[[LD_INTPTR:.*]] = vector.extract %[[LD_DESC_I64]][0] : i64 from vector<4xi64>
21-
//CHECK: %[[LD_BASE_W:.*]] = vector.extract %[[LD_DESC]][2] : i32 from vector<8xi32>
22-
//CHECK: %[[LD_BASE_H:.*]] = vector.extract %[[LD_DESC]][3] : i32 from vector<8xi32>
23-
//CHECK: %[[LD_TILE_W64:.*]] = arith.constant 0 : i64
24-
//CHECK: %[[LD_TILE_W:.*]] = arith.trunci %[[LD_TILE_W64]] : i64 to i32
25-
//CHECK: %[[LD_TILE_H64:.*]] = arith.constant 0 : i64
26-
//CHECK: %[[LD_TILE_H:.*]] = arith.trunci %[[LD_TILE_H64]] : i64 to i32
27-
//CHECK: %[[LD_LLVMPTR:.*]] = llvm.inttoptr %[[LD_INTPTR]] : i64 to !llvm.ptr<1>
28-
//CHECK: %[[LD_SIZEOF_F32:.*]] = arith.constant 4 : i32
29-
//CHECK: %[[LD_BASE_ROW_IN_BYTES:.*]] = arith.muli %[[LD_BASE_W]], %[[LD_SIZEOF_F32]] : i32
30-
//CHECK: %[[LD_LOADED_I32:.*]] = xevm.blockload2d %[[LD_LLVMPTR]], %[[LD_BASE_ROW_IN_BYTES]],
31-
//CHECK-SAME: %[[LD_BASE_H]], %[[LD_BASE_ROW_IN_BYTES]], %[[LD_TILE_W]], %[[LD_TILE_H]]
32-
//CHECK-SAME: <{cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>, elem_size_in_bits = 32 : i32,
33-
//CHECK-SAME: pack_register = false, tile_height = 8 : i32, tile_width = 16 : i32, transpose = false,
34-
//CHECK-SAME: v_blocks = 1 : i32}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi32>
21+
// CHECK: %[[VAR2:.*]] = llvm.inttoptr %[[VAR0]] : i64 to !llvm.ptr<1>
22+
// CHECK: %[[VAR3:.*]] = xevm.blockload2d %[[VAR2]], %[[C64_i32]], %[[C8_i32]], %[[C64_i32]],
23+
// CHECK-SAME: %[[C0_i32]], %[[C0_i32]] <{cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>,
24+
// CHECK-SAME: elem_size_in_bits = 32 : i32, pack_register = false, tile_height = 8 : i32,
25+
// CHECK-SAME: tile_width = 16 : i32, transpose = false, v_blocks = 1 : i32}>
26+
// CHECK: %[[VAR4:.*]] = vector.bitcast %[[VAR3]] : vector<8xi32> to vector<8xf32>
3527
%loaded = xegpu.load_nd %src_tdesc[0, 0] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
3628
: !xegpu.tensor_desc<8x16xf32> -> vector<8xf32>
37-
//CHECK: %[[LD_LOADED_F32:.*]] = vector.bitcast %[[LD_LOADED_I32]] : vector<8xi32> to vector<8xf32>
3829

3930
%tid_x = gpu.thread_id x
4031
%tid_x_i32 = arith.index_cast %tid_x : index to i32
4132
%tid_x_f32 = arith.sitofp %tid_x_i32 : i32 to f32
42-
//CHECK: %[[LOADED_F32_MODIFIED:.*]] = vector.insert %{{.*}}, %[[LD_LOADED_F32]] [0] : f32 into vector<8xf32>
33+
// CHECK: %[[VAR7:.*]] = vector.insert
4334
%loaded_modified = vector.insert %tid_x_f32, %loaded[0] : f32 into vector<8xf32>
4435

45-
// CHECK: %[[PTR_AS_I64:.*]] = arith.index_castui {{.*}} : index to i64
46-
// CHECK: %[[CREATE_DESC_I64:.*]] = vector.bitcast {{.*}} : vector<8xi32> to vector<4xi64>
47-
// CHECK: %[[DESC_0:.*]] = vector.insert %[[PTR_AS_I64]], %[[CREATE_DESC_I64]] [0] : i64 into vector<4xi64>
48-
// CHECK: %[[DESC_1:.*]] = vector.bitcast %[[DESC_0]] : vector<4xi64> to vector<8xi32>
49-
// CHECK: %[[DESC_2:.*]] = vector.insert {{.*}}, %[[DESC_1]] [2] : i32 into vector<8xi32>
50-
// CHECK: %[[DESC_3:.*]] = vector.insert {{.*}}, %[[DESC_2]] [3] : i32 into vector<8xi32>
51-
// CHECK: %[[DESC_4:.*]] = vector.insert {{.*}}, %[[DESC_3]] [4] : i32 into vector<8xi32>
52-
// CHECK: %[[DESC:.*]] = vector.insert {{.*}}, %[[DESC_4]] [5] : i32 into vector<8xi32>
5336
%dst_tdesc = xegpu.create_nd_tdesc %dstte : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<memory_space = global>>
5437

55-
//CHECK: %[[DESC_I64:.*]] = vector.bitcast %[[DESC]] : vector<8xi32> to vector<4xi64>
56-
//CHECK: %[[INTPTR:.*]] = vector.extract %[[DESC_I64]][0] : i64 from vector<4xi64>
57-
//CHECK: %[[BASE_W:.*]] = vector.extract %[[DESC]][2] : i32 from vector<8xi32>
58-
//CHECK: %[[BASE_H:.*]] = vector.extract %[[DESC]][3] : i32 from vector<8xi32>
59-
//CHECK: %[[TILE_W64:.*]] = arith.constant 0 : i64
60-
//CHECK: %[[TILE_W:.*]] = arith.trunci %[[TILE_W64]] : i64 to i32
61-
//CHECK: %[[TILE_H64:.*]] = arith.constant 0 : i64
62-
//CHECK: %[[TILE_H:.*]] = arith.trunci %[[TILE_H64]] : i64 to i32
63-
//CHECK: %[[LLVMPTR:.*]] = llvm.inttoptr %[[INTPTR]] : i64 to !llvm.ptr<1>
64-
//CHECK: %[[SIZEOF_F32:.*]] = arith.constant 4 : i32
65-
//CHECK: %[[BASE_ROW_IN_BYTES:.*]] = arith.muli %[[BASE_W]], %[[SIZEOF_F32]] : i32
66-
//CHECK: %[[FLAT_VALUE_I32:.*]] = vector.bitcast %[[LOADED_F32_MODIFIED]] : vector<8xf32> to vector<8xi32>
67-
//CHECK: xevm.blockstore2d %[[LLVMPTR]], %[[BASE_ROW_IN_BYTES]], %[[BASE_H]], %[[BASE_ROW_IN_BYTES]],
68-
//CHECK-SAME: %[[TILE_W]], %[[TILE_H]], %[[FLAT_VALUE_I32]]
69-
//CHECK-SAME: <{cache_control = #xevm.store_cache_control<L1wb_L2uc_L3uc>, elem_size_in_bits = 32 : i32,
70-
//CHECK-SAME: tile_height = 8 : i32, tile_width = 16 : i32}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32, vector<8xi32>)
38+
// CHECK: %[[VAR8:.*]] = llvm.inttoptr %[[VAR1]] : i64 to !llvm.ptr<1>
39+
// CHECK: %[[VAR9:.*]] = vector.bitcast %[[VAR7]] : vector<8xf32> to vector<8xi32>
40+
// CHECK: xevm.blockstore2d %[[VAR8]], %[[C64_i32]], %[[C8_i32]], %[[C64_i32]], %[[C0_i32]], %[[C0_i32]], %[[VAR9]]
41+
// CHECK-SAME: <{cache_control = #xevm.store_cache_control<L1wb_L2uc_L3uc>, elem_size_in_bits = 32 : i32,
42+
// CHECK-SAME: tile_height = 8 : i32, tile_width = 16 : i32}>
7143
xegpu.store_nd %loaded_modified, %dst_tdesc[0, 0] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}>
7244
: vector<8xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<memory_space = global>>
7345
gpu.return

0 commit comments

Comments
 (0)