|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
2 | 2 | ; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=none < %s | FileCheck %s --check-prefix=NONE |
3 | 3 | ; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=data < %s | FileCheck %s --check-prefix=DATA |
4 | | -; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=data-without-lane-mask < %s | FileCheck %s --check-prefix=DATA_NO_LANEMASK |
5 | 4 | ; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=data-and-control < %s | FileCheck %s --check-prefix=DATA_AND_CONTROL |
6 | 5 | ; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=data-and-control-without-rt-check < %s | FileCheck %s --check-prefix=DATA_AND_CONTROL_NO_RT_CHECK |
7 | 6 |
|
@@ -99,61 +98,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features |
99 | 98 | ; DATA: while.end.loopexit: |
100 | 99 | ; DATA-NEXT: ret void |
101 | 100 | ; |
102 | | -; DATA_NO_LANEMASK-LABEL: @simple_memset_tailfold( |
103 | | -; DATA_NO_LANEMASK-NEXT: entry: |
104 | | -; DATA_NO_LANEMASK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) |
105 | | -; DATA_NO_LANEMASK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[UMAX]] |
106 | | -; DATA_NO_LANEMASK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() |
107 | | -; DATA_NO_LANEMASK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 |
108 | | -; DATA_NO_LANEMASK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] |
109 | | -; DATA_NO_LANEMASK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
110 | | -; DATA_NO_LANEMASK: vector.ph: |
111 | | -; DATA_NO_LANEMASK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() |
112 | | -; DATA_NO_LANEMASK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 |
113 | | -; DATA_NO_LANEMASK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() |
114 | | -; DATA_NO_LANEMASK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 |
115 | | -; DATA_NO_LANEMASK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 |
116 | | -; DATA_NO_LANEMASK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] |
117 | | -; DATA_NO_LANEMASK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] |
118 | | -; DATA_NO_LANEMASK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] |
119 | | -; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1 |
120 | | -; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() |
121 | | -; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 |
122 | | -; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 |
123 | | -; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer |
124 | | -; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0 |
125 | | -; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT4]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
126 | | -; DATA_NO_LANEMASK-NEXT: br label [[VECTOR_BODY:%.*]] |
127 | | -; DATA_NO_LANEMASK: vector.body: |
128 | | -; DATA_NO_LANEMASK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VECTOR_BODY]] ] |
129 | | -; DATA_NO_LANEMASK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0 |
130 | | -; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[INDEX1]], i64 0 |
131 | | -; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer |
132 | | -; DATA_NO_LANEMASK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64() |
133 | | -; DATA_NO_LANEMASK-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]] |
134 | | -; DATA_NO_LANEMASK-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT3]], [[TMP11]] |
135 | | -; DATA_NO_LANEMASK-NEXT: [[TMP12:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] |
136 | | -; DATA_NO_LANEMASK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]] |
137 | | -; DATA_NO_LANEMASK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 |
138 | | -; DATA_NO_LANEMASK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP14]], i32 4, <vscale x 4 x i1> [[TMP12]]) |
139 | | -; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP16]] |
140 | | -; DATA_NO_LANEMASK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] |
141 | | -; DATA_NO_LANEMASK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
142 | | -; DATA_NO_LANEMASK: middle.block: |
143 | | -; DATA_NO_LANEMASK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] |
144 | | -; DATA_NO_LANEMASK: scalar.ph: |
145 | | -; DATA_NO_LANEMASK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] |
146 | | -; DATA_NO_LANEMASK-NEXT: br label [[WHILE_BODY:%.*]] |
147 | | -; DATA_NO_LANEMASK: while.body: |
148 | | -; DATA_NO_LANEMASK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] |
149 | | -; DATA_NO_LANEMASK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] |
150 | | -; DATA_NO_LANEMASK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 |
151 | | -; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 |
152 | | -; DATA_NO_LANEMASK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] |
153 | | -; DATA_NO_LANEMASK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] |
154 | | -; DATA_NO_LANEMASK: while.end.loopexit: |
155 | | -; DATA_NO_LANEMASK-NEXT: ret void |
156 | | -; |
157 | 101 | ; DATA_AND_CONTROL-LABEL: @simple_memset_tailfold( |
158 | 102 | ; DATA_AND_CONTROL-NEXT: entry: |
159 | 103 | ; DATA_AND_CONTROL-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) |
|
0 commit comments