88// CHECK: %[[N:.+]] = arith.floordivsi %[[IDX]], %[[C50176]]
99// CHECK-DAG: %[[P_REM:.+]] = arith.remsi %[[IDX]], %[[C50176]]
1010// CHECK-DAG: %[[P_NEG:.+]] = arith.cmpi slt, %[[P_REM]], %[[C0]]
11- // CHECK-DAG: %[[P_SHIFTED:.+]] = arith.addi %[[P_REM]], %[[C50176]]
11+ // CHECK-DAG: %[[P_SHIFTED:.+]] = arith.addi %[[P_REM]], %[[C50176]] overflow<nsw>
1212// CHECK-DAG: %[[P_MOD:.+]] = arith.select %[[P_NEG]], %[[P_SHIFTED]], %[[P_REM]]
1313// CHECK: %[[P:.+]] = arith.divsi %[[P_MOD]], %[[C224]]
1414// CHECK-DAG: %[[Q_REM:.+]] = arith.remsi %[[IDX]], %[[C224]]
1515// CHECK-DAG: %[[Q_NEG:.+]] = arith.cmpi slt, %[[Q_REM]], %[[C0]]
16- // CHECK-DAG: %[[Q_SHIFTED:.+]] = arith.addi %[[Q_REM]], %[[C224]]
16+ // CHECK-DAG: %[[Q_SHIFTED:.+]] = arith.addi %[[Q_REM]], %[[C224]] overflow<nsw>
1717// CHECK: %[[Q:.+]] = arith.select %[[Q_NEG]], %[[Q_SHIFTED]], %[[Q_REM]]
1818// CHECK: return %[[N]], %[[P]], %[[Q]]
1919func.func @delinearize_static_basis (%linear_index: index ) -> (index , index , index ) {
@@ -30,16 +30,16 @@ func.func @delinearize_static_basis(%linear_index: index) -> (index, index, inde
3030// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
3131// CHECK: %[[DIM1:.+]] = memref.dim %[[MEMREF]], %[[C1]] :
3232// CHECK: %[[DIM2:.+]] = memref.dim %[[MEMREF]], %[[C2]] :
33- // CHECK: %[[STRIDE1:.+]] = arith.muli %[[DIM2]], %[[DIM1]]
33+ // CHECK: %[[STRIDE1:.+]] = arith.muli %[[DIM2]], %[[DIM1]] overflow<nsw, nuw>
3434// CHECK: %[[N:.+]] = arith.floordivsi %[[IDX]], %[[STRIDE1]]
3535// CHECK-DAG: %[[P_REM:.+]] = arith.remsi %[[IDX]], %[[STRIDE1]]
3636// CHECK-DAG: %[[P_NEG:.+]] = arith.cmpi slt, %[[P_REM]], %[[C0]]
37- // CHECK-DAG: %[[P_SHIFTED:.+]] = arith.addi %[[P_REM]], %[[STRIDE1]]
37+ // CHECK-DAG: %[[P_SHIFTED:.+]] = arith.addi %[[P_REM]], %[[STRIDE1]] overflow<nsw>
3838// CHECK-DAG: %[[P_MOD:.+]] = arith.select %[[P_NEG]], %[[P_SHIFTED]], %[[P_REM]]
3939// CHECK: %[[P:.+]] = arith.divsi %[[P_MOD]], %[[DIM2]]
4040// CHECK-DAG: %[[Q_REM:.+]] = arith.remsi %[[IDX]], %[[DIM2]]
4141// CHECK-DAG: %[[Q_NEG:.+]] = arith.cmpi slt, %[[Q_REM]], %[[C0]]
42- // CHECK-DAG: %[[Q_SHIFTED:.+]] = arith.addi %[[Q_REM]], %[[DIM2]]
42+ // CHECK-DAG: %[[Q_SHIFTED:.+]] = arith.addi %[[Q_REM]], %[[DIM2]] overflow<nsw>
4343// CHECK: %[[Q:.+]] = arith.select %[[Q_NEG]], %[[Q_SHIFTED]], %[[Q_REM]]
4444// CHECK: return %[[N]], %[[P]], %[[Q]]
4545func.func @delinearize_dynamic_basis (%linear_index: index , %src: memref <?x?x?xf32 >) -> (index , index , index ) {
@@ -58,10 +58,10 @@ func.func @delinearize_dynamic_basis(%linear_index: index, %src: memref<?x?x?xf3
5858// CHECK-SAME: (%[[arg0:.+]]: index, %[[arg1:.+]]: index, %[[arg2:.+]]: index)
5959// CHECK-DAG: %[[C5:.+]] = arith.constant 5 : index
6060// CHECK-DAG: %[[C15:.+]] = arith.constant 15 : index
61- // CHECK: %[[scaled_0:.+]] = arith.muli %[[arg0]], %[[C15]]
62- // CHECK: %[[scaled_1:.+]] = arith.muli %[[arg1]], %[[C5]]
63- // CHECK: %[[val_0:.+]] = arith.addi %[[scaled_0]], %[[scaled_1]]
64- // CHECK: %[[val_1:.+]] = arith.addi %[[val_0]], %[[arg2]]
61+ // CHECK: %[[scaled_0:.+]] = arith.muli %[[arg0]], %[[C15]] overflow<nsw>
62+ // CHECK: %[[scaled_1:.+]] = arith.muli %[[arg1]], %[[C5]] overflow<nsw>
63+ // CHECK: %[[val_0:.+]] = arith.addi %[[scaled_0]], %[[scaled_1]] overflow<nsw>
64+ // CHECK: %[[val_1:.+]] = arith.addi %[[val_0]], %[[arg2]] overflow<nsw>
6565// CHECK: return %[[val_1]]
6666func.func @linearize_static (%arg0: index , %arg1: index , %arg2: index ) -> index {
6767 %0 = affine.linearize_index [%arg0 , %arg1 , %arg2 ] by (2 , 3 , 5 ) : index
@@ -72,11 +72,11 @@ func.func @linearize_static(%arg0: index, %arg1: index, %arg2: index) -> index {
7272
7373// CHECK-LABEL: @linearize_dynamic
7474// CHECK-SAME: (%[[arg0:.+]]: index, %[[arg1:.+]]: index, %[[arg2:.+]]: index, %[[arg3:.+]]: index, %[[arg4:.+]]: index)
75- // CHECK: %[[stride_0:.+]] = arith.muli %[[arg4]], %[[arg3]]
76- // CHECK: %[[scaled_0:.+]] = arith.muli %[[arg0]], %[[stride_0]]
77- // CHECK: %[[scaled_1:.+]] = arith.muli %[[arg1]], %[[arg4]]
78- // CHECK: %[[val_0:.+]] = arith.addi %[[scaled_0]], %[[scaled_1]]
79- // CHECK: %[[val_1:.+]] = arith.addi %[[val_0]], %[[arg2]]
75+ // CHECK: %[[stride_0:.+]] = arith.muli %[[arg4]], %[[arg3]] overflow<nsw>
76+ // CHECK: %[[scaled_0:.+]] = arith.muli %[[arg0]], %[[stride_0]] overflow<nsw>
77+ // CHECK: %[[scaled_1:.+]] = arith.muli %[[arg1]], %[[arg4]] overflow<nsw>
78+ // CHECK: %[[val_0:.+]] = arith.addi %[[scaled_0]], %[[scaled_1]] overflow<nsw>
79+ // CHECK: %[[val_1:.+]] = arith.addi %[[val_0]], %[[arg2]] overflow<nsw>
8080// CHECK: return %[[val_1]]
8181func.func @linearize_dynamic (%arg0: index , %arg1: index , %arg2: index , %arg3: index , %arg4: index ) -> index {
8282 // Note: no outer bounds
@@ -86,17 +86,33 @@ func.func @linearize_dynamic(%arg0: index, %arg1: index, %arg2: index, %arg3: in
8686
8787// -----
8888
89+ // CHECK-LABEL: @linearize_dynamic_disjoint
90+ // CHECK-SAME: (%[[arg0:.+]]: index, %[[arg1:.+]]: index, %[[arg2:.+]]: index, %[[arg3:.+]]: index, %[[arg4:.+]]: index)
91+ // CHECK: %[[stride_0:.+]] = arith.muli %[[arg4]], %[[arg3]] overflow<nsw, nuw>
92+ // CHECK: %[[scaled_0:.+]] = arith.muli %[[arg0]], %[[stride_0]] overflow<nsw>
93+ // CHECK: %[[scaled_1:.+]] = arith.muli %[[arg1]], %[[arg4]] overflow<nsw>
94+ // CHECK: %[[val_0:.+]] = arith.addi %[[scaled_0]], %[[scaled_1]] overflow<nsw>
95+ // CHECK: %[[val_1:.+]] = arith.addi %[[val_0]], %[[arg2]] overflow<nsw>
96+ // CHECK: return %[[val_1]]
97+ func.func @linearize_dynamic_disjoint (%arg0: index , %arg1: index , %arg2: index , %arg3: index , %arg4: index ) -> index {
98+ // Note: no outer bounds
99+ %0 = affine.linearize_index disjoint [%arg0 , %arg1 , %arg2 ] by (%arg3 , %arg4 ) : index
100+ func.return %0 : index
101+ }
102+
103+ // -----
104+
89105// CHECK-LABEL: @linearize_sort_adds
90106// CHECK-SAME: (%[[arg0:.+]]: memref<?xi32>, %[[arg1:.+]]: index, %[[arg2:.+]]: index)
91107// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
92108// CHECK: scf.for %[[arg3:.+]] = %{{.*}} to %[[arg2]] step %{{.*}} {
93109// CHECK: scf.for %[[arg4:.+]] = %{{.*}} to %[[C4]] step %{{.*}} {
94- // CHECK: %[[stride_0:.+]] = arith.muli %[[arg2]], %[[C4]]
95- // CHECK: %[[scaled_0:.+]] = arith.muli %[[arg1]], %[[stride_0]]
96- // CHECK: %[[scaled_1:.+]] = arith.muli %[[arg4]], %[[arg2]]
110+ // CHECK: %[[stride_0:.+]] = arith.muli %[[arg2]], %[[C4]] overflow<nsw, nuw>
111+ // CHECK: %[[scaled_0:.+]] = arith.muli %[[arg1]], %[[stride_0]] overflow<nsw>
112+ // CHECK: %[[scaled_1:.+]] = arith.muli %[[arg4]], %[[arg2]] overflow<nsw>
97113// Note: even though %arg3 has a lower stride, we add it first
98- // CHECK: %[[val_0_2:.+]] = arith.addi %[[scaled_0]], %[[arg3]]
99- // CHECK: %[[val_1:.+]] = arith.addi %[[val_0_2]], %[[scaled_1]]
114+ // CHECK: %[[val_0_2:.+]] = arith.addi %[[scaled_0]], %[[arg3]] overflow<nsw>
115+ // CHECK: %[[val_1:.+]] = arith.addi %[[val_0_2]], %[[scaled_1]] overflow<nsw>
100116// CHECK: memref.store %{{.*}}, %[[arg0]][%[[val_1]]]
101117func.func @linearize_sort_adds (%arg0: memref <?xi32 >, %arg1: index , %arg2: index ) {
102118 %c0 = arith.constant 0 : index
0 commit comments