Skip to content

Commit b2c9ffa

Browse files
committed
Add missing file.
1 parent fd3186e commit b2c9ffa

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// RUN: mlir-opt -convert-xegpu-to-xevm -canonicalize %s | FileCheck %s
2+
3+
gpu.module @load_store_check {
4+
// CHECK-LABEL: @load_store(
5+
// CHECK-SAME: %[[SRC:.*]]: memref<8x64xf32, 1>, %[[DST:.*]]: memref<8x32xf32, 1>
6+
gpu.func @load_store(%src: memref<8x64xf32, 1>, %dst: memref<8x32xf32, 1>) kernel {
7+
// CHECK: %[[C512:.*]] = arith.constant 512 : i64
8+
// CHECK: %[[C32:.*]] = arith.constant 32 : i32
9+
// CHECK: %[[C384:.*]] = arith.constant 384 : i64
10+
// CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<4xi64>
11+
// CHECK: %[[C8:.*]] = arith.constant 8 : i32
12+
// CHECK: %[[C64:.*]] = arith.constant 64 : i32
13+
// CHECK: %[[C0:.*]] = arith.constant 0 : i32
14+
15+
// CHECK: %[[SRCCE:.*]] = memref.memory_space_cast %[[SRC]] : memref<8x64xf32, 1> to memref<8x64xf32>
16+
%srcce = memref.memory_space_cast %src : memref<8x64xf32, 1> to memref<8x64xf32>
17+
// CHECK: %[[DSTTE:.*]] = memref.memory_space_cast %[[DST]] : memref<8x32xf32, 1> to memref<8x32xf32>
18+
%dstte = memref.memory_space_cast %dst : memref<8x32xf32, 1> to memref<8x32xf32>
19+
20+
// CHECK: %[[INTPTR:.*]] = memref.extract_aligned_pointer_as_index %[[SRCCE]] : memref<8x64xf32> -> index
21+
// CHECK: %[[INTPTR_I64:.*]] = arith.index_castui %[[INTPTR]] : index to i64
22+
// CHECK: %[[VEC1:.*]] = vector.insert %[[INTPTR_I64]], %[[CST]] [0] : i64 into vector<4xi64>
23+
// CHECK: %[[VEC2:.*]] = vector.bitcast %[[VEC1]] : vector<4xi64> to vector<8xi32>
24+
// CHECK: %[[VEC3:.*]] = vector.insert %[[C64]], %[[VEC2]] [2] : i32 into vector<8xi32>
25+
// CHECK: %[[VEC4:.*]] = vector.insert %[[C8]], %[[VEC3]] [3] : i32 into vector<8xi32>
26+
// CHECK: %[[VEC5:.*]] = vector.insert %[[C0]], %[[VEC4]] [4] : i32 into vector<8xi32>
27+
// CHECK: %[[VEC6:.*]] = vector.insert %[[C0]], %[[VEC5]] [5] : i32 into vector<8xi32>
28+
%src_tdesc = xegpu.create_nd_tdesc %srcce : memref<8x64xf32> -> !xegpu.tensor_desc<32xf32>
29+
// CHECK: %[[VEC7:.*]] = vector.bitcast %[[VEC6]] : vector<8xi32> to vector<4xi64>
30+
// CHECK: %[[EXTR:.*]] = vector.extract %[[VEC7]][0] : i64 from vector<4xi64>
31+
// CHECK: %[[ADDR:.*]] = arith.addi %[[EXTR]], %[[C384]] : i64
32+
// CHECK: %[[PTR:.*]] = llvm.inttoptr %[[ADDR]] : i64 to !llvm.ptr<1>
33+
// CHECK: %[[LOAD:.*]] = xevm.blockload %[[PTR]] <{cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>}>
34+
// CHECK-SAME: : (!llvm.ptr<1>) -> vector<2xi32>
35+
%loaded = xegpu.load_nd %src_tdesc[1, 32] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
36+
: !xegpu.tensor_desc<32xf32> -> vector<2xf32>
37+
38+
// CHECK: %[[INTPTR1:.*]] = memref.extract_aligned_pointer_as_index %[[DSTTE]] : memref<8x32xf32> -> index
39+
// CHECK: %[[INTPTR1_I64:.*]] = arith.index_castui %[[INTPTR1]] : index to i64
40+
// CHECK: %[[VEC1_1:.*]] = vector.insert %[[INTPTR1_I64]], %[[CST]] [0] : i64 into vector<4xi64>
41+
// CHECK: %[[VEC2_1:.*]] = vector.bitcast %[[VEC1_1]] : vector<4xi64> to vector<8xi32>
42+
// CHECK: %[[VEC3_1:.*]] = vector.insert %[[C32]], %[[VEC2_1]] [2] : i32 into vector<8xi32>
43+
// CHECK: %[[VEC4_1:.*]] = vector.insert %[[C8]], %[[VEC3_1]] [3] : i32 into vector<8xi32>
44+
// CHECK: %[[VEC5_1:.*]] = vector.insert %[[C0]], %[[VEC4_1]] [4] : i32 into vector<8xi32>
45+
// CHECK: %[[VEC6_1:.*]] = vector.insert %[[C0]], %[[VEC5_1]] [5] : i32 into vector<8xi32>
46+
%dst_tdesc = xegpu.create_nd_tdesc %dstte : memref<8x32xf32> -> !xegpu.tensor_desc<32xf32, #xegpu.block_tdesc_attr<memory_space = global>>
47+
// CHECK: %[[VEC7_1:.*]] = vector.bitcast %[[VEC6_1]] : vector<8xi32> to vector<4xi64>
48+
// CHECK: %[[EXTR1:.*]] = vector.extract %[[VEC7_1]][0] : i64 from vector<4xi64>
49+
// CHECK: %[[ADDR1:.*]] = arith.addi %[[EXTR1]], %[[C512]] : i64
50+
// CHECK: %[[PTR1:.*]] = llvm.inttoptr %[[ADDR1]] : i64 to !llvm.ptr<1>
51+
// CHECK: xevm.blockstore %[[PTR1]], %[[LOAD]] <{cache_control = #xevm.store_cache_control<L1wb_L2uc_L3uc>}>
52+
// CHECK-SAME: : (!llvm.ptr<1>, vector<2xi32>)
53+
xegpu.store_nd %loaded, %dst_tdesc[4, 0] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}>
54+
: vector<2xf32>, !xegpu.tensor_desc<32xf32, #xegpu.block_tdesc_attr<memory_space = global>>
55+
gpu.return
56+
}
57+
}

0 commit comments

Comments
 (0)