Skip to content

Commit b63c9fe

Browse files
committed
update according to comments
1 parent c9e2754 commit b63c9fe

File tree

3 files changed

+22
-18
lines changed

3 files changed

+22
-18
lines changed

mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1969,7 +1969,7 @@ void vector::populateVectorNarrowTypeEmulationPatterns(
19691969
typeConverter, patterns.getContext());
19701970

19711971
// Populate `vector.*` store conversion patterns. The caller can choose
1972-
// to avoid emitting atomic operations and reduce it to load-modify-write
1972+
// to avoid emitting atomic operations and reduce it to read-modify-write
19731973
// sequence for stores if it is known there are no thread contentions.
19741974
patterns.insert<ConvertVectorStore>(patterns.getContext(), disableAtomicRMW);
19751975
}

mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned-non-atomic.mlir

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,24 @@
1-
// RUN: mlir-opt --test-emulate-narrow-int="arith-compute-bitwidth=1 memref-load-bitwidth=8 atomic-store=false" --cse --split-input-file %s | FileCheck %s
1+
// RUN: mlir-opt --test-emulate-narrow-int="arith-compute-bitwidth=1 memref-load-bitwidth=8 disable-atomic-rmw=true" --cse --split-input-file %s | FileCheck %s
22

33
// TODO: remove memref.alloc() in the tests to eliminate noises.
44
// memref.alloc exists here because sub-byte vector data types such as i2
55
// are currently not supported as input arguments.
66

7+
///----------------------------------------------------------------------------------------
8+
/// vector.store
9+
///----------------------------------------------------------------------------------------
10+
711
func.func @vector_store_i2_const_index_two_partial_stores(%arg0: vector<3xi2>) {
812
%0 = memref.alloc() : memref<3x3xi2>
913
%c0 = arith.constant 0 : index
1014
%c2 = arith.constant 2 : index
1115
vector.store %arg0, %0[%c2, %c0] :memref<3x3xi2>, vector<3xi2>
1216
return
1317
}
14-
// In this example, emit two RMW stores without full-width store.
15-
// Store bit [12:18), byte [1:2] to a 3-byte vector, both bytes are
16-
// accessed partially.
18+
19+
// Emit two non-atomic RMW partial stores. Store 6 bits from the input vector (bits [12:18)),
20+
// into bytes [1:2] from a 3-byte output memref. Due to partial storing,
21+
// both bytes are accessed partially through masking.
1722

1823
// CHECK: func @vector_store_i2_const_index_two_partial_stores(
1924
// CHECK-SAME: %[[ARG0:.+]]: vector<3xi2>)
@@ -28,10 +33,10 @@ func.func @vector_store_i2_const_index_two_partial_stores(%arg0: vector<3xi2>) {
2833
// CHECK: %[[INSERT:.+]] = vector.insert_strided_slice %[[EXTRACT]], %[[CST0]]
2934
// CHECK-SAME: {offsets = [2], strides = [1]} : vector<2xi2> into vector<4xi2>
3035
// CHECK: %[[LOAD:.+]] = vector.load
31-
// CHECK: %[[UPCAST:.+]] = vector.bitcast %[[LOAD]] : vector<1xi8> to vector<4xi2>
32-
// CHECK: %[[SELECT:.+]] = arith.select %[[CST]], %[[INSERT]], %[[UPCAST]]
33-
// CHECK: %[[DOWNCAST:.+]] = vector.bitcast %[[SELECT]]
34-
// CHECK: vector.store %[[DOWNCAST]], %[[ALLOC]][%[[C1]]]
36+
// CHECK: %[[DOWNCAST:.+]] = vector.bitcast %[[LOAD]] : vector<1xi8> to vector<4xi2>
37+
// CHECK: %[[SELECT:.+]] = arith.select %[[CST]], %[[INSERT]], %[[DOWNCAST]]
38+
// CHECK: %[[UPCAST:.+]] = vector.bitcast %[[SELECT]]
39+
// CHECK: vector.store %[[UPCAST]], %[[ALLOC]][%[[C1]]]
3540

3641
// Part 2 RMW sequence
3742
// CHECK: %[[OFFSET:.+]] = arith.addi %[[C1]], %[[C1]] : index
@@ -90,11 +95,11 @@ func.func @vector_store_i2_two_partial_one_full_stores(%arg0: vector<7xi2>) {
9095
// CHECK: %[[INSERT2:.+]] = vector.insert_strided_slice %[[EXTRACT2]]
9196
// CHECK-SAME: {offsets = [0], strides = [1]}
9297
// CHECK: %[[CST1:.+]] = arith.constant dense<[true, true, false, false]>
93-
// CHECK: %[[LOAD1:.+]] = vector.load %[[ALLOC]][%[[INDEX2]]]
94-
// CHECK: %[[UPCAST1:.+]] = vector.bitcast %[[LOAD1]]
95-
// CHECK: %[[SELECT1:.+]] = arith.select %[[CST1]], %[[INSERT2]], %[[UPCAST1]]
96-
// CHECK: %[[DOWNCAST1:.+]] = vector.bitcast %[[SELECT1]]
97-
// CHECK: vector.store %[[DOWNCAST1]], %[[ALLOC]][%[[INDEX2]]]
98+
// CHECK: %[[LOAD2:.+]] = vector.load %[[ALLOC]][%[[INDEX2]]]
99+
// CHECK: %[[UPCAST2:.+]] = vector.bitcast %[[LOAD2]]
100+
// CHECK: %[[SELECT2:.+]] = arith.select %[[CST1]], %[[INSERT2]], %[[UPCAST2]]
101+
// CHECK: %[[DOWNCAST2:.+]] = vector.bitcast %[[SELECT2]]
102+
// CHECK: vector.store %[[DOWNCAST2]], %[[ALLOC]][%[[INDEX2]]]
98103

99104
// -----
100105

mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -369,10 +369,9 @@ func.func @vector_store_i2_const_index_two_partial_stores(%arg0: vector<3xi2>) {
369369
return
370370
}
371371

372-
// In this example, emit 2 atomic RMWs.
373-
//
374-
// Note, sizeof(%src) = 18 bits. This is modelled as %src_as_bytes:
375-
// <3xi8> (bits [0, 18) with the input values from %src, and [18, 24) are masked out)
372+
// Emit two atomic RMW partial stores. Store 6 bits from the input vector (bits [12:18)),
373+
// into bytes [1:2] from a 3-byte output memref. Due to partial storing,
374+
// both bytes are accessed partially through masking.
376375

377376
// CHECK-LABEL: func @vector_store_i2_const_index_two_partial_stores(
378377
// CHECK-SAME: %[[ARG0:.+]]: vector<3xi2>)

0 commit comments

Comments
 (0)