update according to comments

lialan · lialan · commit b63c9fe16353 · 2025-02-06T17:34:36.000Z
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
@@ -1969,7 +1969,7 @@ void vector::populateVectorNarrowTypeEmulationPatterns(
       typeConverter, patterns.getContext());
 
   // Populate `vector.*` store conversion patterns. The caller can choose
-  // to avoid emitting atomic operations and reduce it to load-modify-write
+  // to avoid emitting atomic operations and reduce it to read-modify-write
   // sequence for stores if it is known there are no thread contentions.
   patterns.insert<ConvertVectorStore>(patterns.getContext(), disableAtomicRMW);
 }
diff --git a/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned-non-atomic.mlir b/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned-non-atomic.mlir
@@ -1,19 +1,24 @@
-// RUN: mlir-opt --test-emulate-narrow-int="arith-compute-bitwidth=1 memref-load-bitwidth=8 atomic-store=false" --cse --split-input-file %s | FileCheck %s
+// RUN: mlir-opt --test-emulate-narrow-int="arith-compute-bitwidth=1 memref-load-bitwidth=8 disable-atomic-rmw=true" --cse --split-input-file %s | FileCheck %s
 
 // TODO: remove memref.alloc() in the tests to eliminate noises.
 // memref.alloc exists here because sub-byte vector data types such as i2
 // are currently not supported as input arguments.
 
+///----------------------------------------------------------------------------------------
+/// vector.store
+///----------------------------------------------------------------------------------------
+
 func.func @vector_store_i2_const_index_two_partial_stores(%arg0: vector<3xi2>) {
     %0 = memref.alloc() : memref<3x3xi2>
     %c0 = arith.constant 0 : index
     %c2 = arith.constant 2 : index
     vector.store %arg0, %0[%c2, %c0] :memref<3x3xi2>, vector<3xi2>
     return
 }
-// In this example, emit two RMW stores without full-width store.
-// Store bit [12:18), byte [1:2] to a 3-byte vector, both bytes are
-// accessed partially.
+
+// Emit two non-atomic RMW partial stores. Store 6 bits from the input vector (bits [12:18)),
+// into bytes [1:2] from a 3-byte output memref. Due to partial storing,
+// both bytes are accessed partially through masking.
 
 // CHECK: func @vector_store_i2_const_index_two_partial_stores(
 // CHECK-SAME: %[[ARG0:.+]]: vector<3xi2>)
@@ -28,10 +33,10 @@ func.func @vector_store_i2_const_index_two_partial_stores(%arg0: vector<3xi2>) {
 // CHECK: %[[INSERT:.+]] = vector.insert_strided_slice %[[EXTRACT]], %[[CST0]]
 // CHECK-SAME: {offsets = [2], strides = [1]} : vector<2xi2> into vector<4xi2>
 // CHECK: %[[LOAD:.+]] = vector.load
-// CHECK: %[[UPCAST:.+]] = vector.bitcast %[[LOAD]] : vector<1xi8> to vector<4xi2>
-// CHECK: %[[SELECT:.+]] = arith.select %[[CST]], %[[INSERT]], %[[UPCAST]]
-// CHECK: %[[DOWNCAST:.+]] = vector.bitcast %[[SELECT]]
-// CHECK: vector.store %[[DOWNCAST]], %[[ALLOC]][%[[C1]]]
+// CHECK: %[[DOWNCAST:.+]] = vector.bitcast %[[LOAD]] : vector<1xi8> to vector<4xi2>
+// CHECK: %[[SELECT:.+]] = arith.select %[[CST]], %[[INSERT]], %[[DOWNCAST]]
+// CHECK: %[[UPCAST:.+]] = vector.bitcast %[[SELECT]]
+// CHECK: vector.store %[[UPCAST]], %[[ALLOC]][%[[C1]]]
 
 // Part 2 RMW sequence
 // CHECK: %[[OFFSET:.+]] = arith.addi %[[C1]], %[[C1]] : index
@@ -90,11 +95,11 @@ func.func @vector_store_i2_two_partial_one_full_stores(%arg0: vector<7xi2>) {
 // CHECK: %[[INSERT2:.+]] = vector.insert_strided_slice %[[EXTRACT2]]
 // CHECK-SAME: {offsets = [0], strides = [1]}
 // CHECK: %[[CST1:.+]] = arith.constant dense<[true, true, false, false]>
-// CHECK: %[[LOAD1:.+]] = vector.load %[[ALLOC]][%[[INDEX2]]]
-// CHECK: %[[UPCAST1:.+]] = vector.bitcast %[[LOAD1]]
-// CHECK: %[[SELECT1:.+]] = arith.select %[[CST1]], %[[INSERT2]], %[[UPCAST1]]
-// CHECK: %[[DOWNCAST1:.+]] = vector.bitcast %[[SELECT1]]
-// CHECK: vector.store %[[DOWNCAST1]], %[[ALLOC]][%[[INDEX2]]]
+// CHECK: %[[LOAD2:.+]] = vector.load %[[ALLOC]][%[[INDEX2]]]
+// CHECK: %[[UPCAST2:.+]] = vector.bitcast %[[LOAD2]]
+// CHECK: %[[SELECT2:.+]] = arith.select %[[CST1]], %[[INSERT2]], %[[UPCAST2]]
+// CHECK: %[[DOWNCAST2:.+]] = vector.bitcast %[[SELECT2]]
+// CHECK: vector.store %[[DOWNCAST2]], %[[ALLOC]][%[[INDEX2]]]
 
 // -----
 
diff --git a/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir b/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir
@@ -369,10 +369,9 @@ func.func @vector_store_i2_const_index_two_partial_stores(%arg0: vector<3xi2>) {
     return
 }
 
-// In this example, emit 2 atomic RMWs.
-//
-// Note, sizeof(%src) = 18 bits. This is modelled as %src_as_bytes:
-// <3xi8> (bits [0, 18) with the input values from %src, and [18, 24) are masked out)
+// Emit two atomic RMW partial stores. Store 6 bits from the input vector (bits [12:18)),
+// into bytes [1:2] from a 3-byte output memref. Due to partial storing,
+// both bytes are accessed partially through masking.
 
 // CHECK-LABEL: func @vector_store_i2_const_index_two_partial_stores(
 // CHECK-SAME: %[[ARG0:.+]]: vector<3xi2>)

Original file line number	Diff line number	Diff line change
`@@ -1969,7 +1969,7 @@ void vector::populateVectorNarrowTypeEmulationPatterns(`
`1969`	`1969`	`typeConverter, patterns.getContext());`
`1970`	`1970`
`1971`	`1971`	// Populate `vector.*` store conversion patterns. The caller can choose
`1972`		`- // to avoid emitting atomic operations and reduce it to load-modify-write`
	`1972`	`+ // to avoid emitting atomic operations and reduce it to read-modify-write`
`1973`	`1973`	`// sequence for stores if it is known there are no thread contentions.`
`1974`	`1974`	`patterns.insert<ConvertVectorStore>(patterns.getContext(), disableAtomicRMW);`
`1975`	`1975`	`}`