1- // RUN: mlir-opt --test-emulate-narrow-int="arith-compute-bitwidth=1 memref-load-bitwidth=8 atomic-store=false " --cse --split-input-file %s | FileCheck %s
1+ // RUN: mlir-opt --test-emulate-narrow-int="arith-compute-bitwidth=1 memref-load-bitwidth=8 disable- atomic-rmw=true " --cse --split-input-file %s | FileCheck %s
22
33// TODO: remove memref.alloc() in the tests to eliminate noises.
44// memref.alloc exists here because sub-byte vector data types such as i2
55// are currently not supported as input arguments.
66
7+ ///----------------------------------------------------------------------------------------
8+ /// vector.store
9+ ///----------------------------------------------------------------------------------------
10+
711func.func @vector_store_i2_const_index_two_partial_stores (%arg0: vector <3 xi2 >) {
812 %0 = memref.alloc () : memref <3 x3 xi2 >
913 %c0 = arith.constant 0 : index
1014 %c2 = arith.constant 2 : index
1115 vector.store %arg0 , %0 [%c2 , %c0 ] :memref <3 x3 xi2 >, vector <3 xi2 >
1216 return
1317}
14- // In this example, emit two RMW stores without full-width store.
15- // Store bit [12:18), byte [1:2] to a 3-byte vector, both bytes are
16- // accessed partially.
18+
19+ // Emit two non-atomic RMW partial stores. Store 6 bits from the input vector (bits [12:18)),
20+ // into bytes [1:2] from a 3-byte output memref. Due to partial storing,
21+ // both bytes are accessed partially through masking.
1722
1823// CHECK: func @vector_store_i2_const_index_two_partial_stores(
1924// CHECK-SAME: %[[ARG0:.+]]: vector<3xi2>)
@@ -28,10 +33,10 @@ func.func @vector_store_i2_const_index_two_partial_stores(%arg0: vector<3xi2>) {
2833// CHECK: %[[INSERT:.+]] = vector.insert_strided_slice %[[EXTRACT]], %[[CST0]]
2934// CHECK-SAME: {offsets = [2], strides = [1]} : vector<2xi2> into vector<4xi2>
3035// CHECK: %[[LOAD:.+]] = vector.load
31- // CHECK: %[[UPCAST :.+]] = vector.bitcast %[[LOAD]] : vector<1xi8> to vector<4xi2>
32- // CHECK: %[[SELECT:.+]] = arith.select %[[CST]], %[[INSERT]], %[[UPCAST ]]
33- // CHECK: %[[DOWNCAST :.+]] = vector.bitcast %[[SELECT]]
34- // CHECK: vector.store %[[DOWNCAST ]], %[[ALLOC]][%[[C1]]]
36+ // CHECK: %[[DOWNCAST :.+]] = vector.bitcast %[[LOAD]] : vector<1xi8> to vector<4xi2>
37+ // CHECK: %[[SELECT:.+]] = arith.select %[[CST]], %[[INSERT]], %[[DOWNCAST ]]
38+ // CHECK: %[[UPCAST :.+]] = vector.bitcast %[[SELECT]]
39+ // CHECK: vector.store %[[UPCAST ]], %[[ALLOC]][%[[C1]]]
3540
3641// Part 2 RMW sequence
3742// CHECK: %[[OFFSET:.+]] = arith.addi %[[C1]], %[[C1]] : index
@@ -90,11 +95,11 @@ func.func @vector_store_i2_two_partial_one_full_stores(%arg0: vector<7xi2>) {
9095// CHECK: %[[INSERT2:.+]] = vector.insert_strided_slice %[[EXTRACT2]]
9196// CHECK-SAME: {offsets = [0], strides = [1]}
9297// CHECK: %[[CST1:.+]] = arith.constant dense<[true, true, false, false]>
93- // CHECK: %[[LOAD1 :.+]] = vector.load %[[ALLOC]][%[[INDEX2]]]
94- // CHECK: %[[UPCAST1 :.+]] = vector.bitcast %[[LOAD1 ]]
95- // CHECK: %[[SELECT1 :.+]] = arith.select %[[CST1]], %[[INSERT2]], %[[UPCAST1 ]]
96- // CHECK: %[[DOWNCAST1 :.+]] = vector.bitcast %[[SELECT1 ]]
97- // CHECK: vector.store %[[DOWNCAST1 ]], %[[ALLOC]][%[[INDEX2]]]
98+ // CHECK: %[[LOAD2 :.+]] = vector.load %[[ALLOC]][%[[INDEX2]]]
99+ // CHECK: %[[UPCAST2 :.+]] = vector.bitcast %[[LOAD2 ]]
100+ // CHECK: %[[SELECT2 :.+]] = arith.select %[[CST1]], %[[INSERT2]], %[[UPCAST2 ]]
101+ // CHECK: %[[DOWNCAST2 :.+]] = vector.bitcast %[[SELECT2 ]]
102+ // CHECK: vector.store %[[DOWNCAST2 ]], %[[ALLOC]][%[[INDEX2]]]
98103
99104// -----
100105
0 commit comments