@@ -11,27 +11,27 @@ define void @cost_store_i8(ptr %dst) #0 {
1111; DEFAULT-NEXT: iter.check:
1212; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
1313; DEFAULT: vector.main.loop.iter.check:
14- ; DEFAULT-NEXT: [[TMP2 :%.*]] = call i64 @llvm.vscale.i64()
15- ; DEFAULT-NEXT: [[TMP3 :%.*]] = shl nuw i64 [[TMP2 ]], 5
16- ; DEFAULT-NEXT: [[MIN_ITERS_CHECK1 :%.*]] = icmp ult i64 101, [[TMP3 ]]
17- ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1 ]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
14+ ; DEFAULT-NEXT: [[TMP0 :%.*]] = call i64 @llvm.vscale.i64()
15+ ; DEFAULT-NEXT: [[TMP1 :%.*]] = shl nuw i64 [[TMP0 ]], 5
16+ ; DEFAULT-NEXT: [[MIN_ITERS_CHECK :%.*]] = icmp ult i64 101, [[TMP1 ]]
17+ ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK ]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
1818; DEFAULT: vector.ph:
19- ; DEFAULT-NEXT: [[TMP4 :%.*]] = call i64 @llvm.vscale.i64()
20- ; DEFAULT-NEXT: [[TMP5 :%.*]] = mul nuw i64 [[TMP4 ]], 32
21- ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 101, [[TMP5 ]]
19+ ; DEFAULT-NEXT: [[TMP2 :%.*]] = call i64 @llvm.vscale.i64()
20+ ; DEFAULT-NEXT: [[TMP3 :%.*]] = mul nuw i64 [[TMP2 ]], 32
21+ ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 101, [[TMP3 ]]
2222; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 101, [[N_MOD_VF]]
2323; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
2424; DEFAULT: vector.body:
2525; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
26- ; DEFAULT-NEXT: [[TMP9 :%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
27- ; DEFAULT-NEXT: [[TMP22 :%.*]] = call i64 @llvm.vscale.i64()
28- ; DEFAULT-NEXT: [[TMP23 :%.*]] = shl nuw i64 [[TMP22 ]], 4
29- ; DEFAULT-NEXT: [[TMP24 :%.*]] = getelementptr i8, ptr [[TMP9 ]], i64 [[TMP23 ]]
30- ; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP9 ]], align 1
31- ; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP24 ]], align 1
32- ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5 ]]
33- ; DEFAULT-NEXT: [[TMP11 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
34- ; DEFAULT-NEXT: br i1 [[TMP11 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
26+ ; DEFAULT-NEXT: [[TMP4 :%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
27+ ; DEFAULT-NEXT: [[TMP5 :%.*]] = call i64 @llvm.vscale.i64()
28+ ; DEFAULT-NEXT: [[TMP6 :%.*]] = shl nuw i64 [[TMP5 ]], 4
29+ ; DEFAULT-NEXT: [[TMP7 :%.*]] = getelementptr i8, ptr [[TMP4 ]], i64 [[TMP6 ]]
30+ ; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP4 ]], align 1
31+ ; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP7 ]], align 1
32+ ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3 ]]
33+ ; DEFAULT-NEXT: [[TMP8 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
34+ ; DEFAULT-NEXT: br i1 [[TMP8 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3535; DEFAULT: middle.block:
3636; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 101, [[N_VEC]]
3737; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
@@ -42,11 +42,11 @@ define void @cost_store_i8(ptr %dst) #0 {
4242; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
4343; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
4444; DEFAULT: vec.epilog.vector.body:
45- ; DEFAULT-NEXT: [[INDEX5 :%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
46- ; DEFAULT-NEXT: [[TMP19 :%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX5 ]]
47- ; DEFAULT-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP19 ]], align 1
48- ; DEFAULT-NEXT: [[INDEX_NEXT6 ]] = add nuw i64 [[INDEX5 ]], 8
49- ; DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT6 ]], 96
45+ ; DEFAULT-NEXT: [[INDEX1 :%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
46+ ; DEFAULT-NEXT: [[TMP9 :%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1 ]]
47+ ; DEFAULT-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP9 ]], align 1
48+ ; DEFAULT-NEXT: [[INDEX_NEXT2 ]] = add nuw i64 [[INDEX1 ]], 8
49+ ; DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2 ]], 96
5050; DEFAULT-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
5151; DEFAULT: vec.epilog.middle.block:
5252; DEFAULT-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -70,25 +70,25 @@ define void @cost_store_i8(ptr %dst) #0 {
7070; PRED: vector.ph:
7171; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
7272; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
73- ; PRED-NEXT: [[TMP7 :%.*]] = call i64 @llvm.vscale.i64()
74- ; PRED-NEXT: [[TMP8 :%.*]] = shl nuw i64 [[TMP7 ]], 4
75- ; PRED-NEXT: [[TMP9 :%.*]] = sub i64 101, [[TMP8 ]]
76- ; PRED-NEXT: [[TMP10 :%.*]] = icmp ugt i64 101, [[TMP8 ]]
77- ; PRED-NEXT: [[TMP11 :%.*]] = select i1 [[TMP10 ]], i64 [[TMP9 ]], i64 0
73+ ; PRED-NEXT: [[TMP2 :%.*]] = call i64 @llvm.vscale.i64()
74+ ; PRED-NEXT: [[TMP3 :%.*]] = shl nuw i64 [[TMP2 ]], 4
75+ ; PRED-NEXT: [[TMP4 :%.*]] = sub i64 101, [[TMP3 ]]
76+ ; PRED-NEXT: [[TMP5 :%.*]] = icmp ugt i64 101, [[TMP3 ]]
77+ ; PRED-NEXT: [[TMP6 :%.*]] = select i1 [[TMP5 ]], i64 [[TMP4 ]], i64 0
7878; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 101)
7979; PRED-NEXT: br label [[VECTOR_BODY:%.*]]
8080; PRED: vector.body:
8181; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
8282; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
83- ; PRED-NEXT: [[TMP13 :%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
84- ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> zeroinitializer, ptr align 1 [[TMP13 ]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
83+ ; PRED-NEXT: [[TMP7 :%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
84+ ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> zeroinitializer, ptr align 1 [[TMP7 ]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
8585; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
86- ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11 ]])
87- ; PRED-NEXT: [[TMP14 :%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
88- ; PRED-NEXT: [[TMP12 :%.*]] = xor i1 [[TMP14 ]], true
89- ; PRED-NEXT: br i1 [[TMP12 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
86+ ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP6 ]])
87+ ; PRED-NEXT: [[TMP8 :%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
88+ ; PRED-NEXT: [[TMP9 :%.*]] = xor i1 [[TMP8 ]], true
89+ ; PRED-NEXT: br i1 [[TMP9 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
9090; PRED: middle.block:
91- ; PRED-NEXT: br label [[LOOP :%.*]]
91+ ; PRED-NEXT: br label [[EXIT :%.*]]
9292; PRED: exit:
9393; PRED-NEXT: ret void
9494;
@@ -113,33 +113,33 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
113113; DEFAULT-NEXT: iter.check:
114114; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
115115; DEFAULT: vector.memcheck:
116- ; DEFAULT-NEXT: [[SCEVGEP1 :%.*]] = getelementptr i8, ptr [[DST]], i64 1000
117- ; DEFAULT-NEXT: [[SCEVGEP :%.*]] = getelementptr i8, ptr [[SRC]], i64 8
118- ; DEFAULT-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP ]]
119- ; DEFAULT-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1 ]]
116+ ; DEFAULT-NEXT: [[SCEVGEP :%.*]] = getelementptr i8, ptr [[DST]], i64 1000
117+ ; DEFAULT-NEXT: [[SCEVGEP1 :%.*]] = getelementptr i8, ptr [[SRC]], i64 8
118+ ; DEFAULT-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1 ]]
119+ ; DEFAULT-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP ]]
120120; DEFAULT-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
121121; DEFAULT-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
122122; DEFAULT: vector.main.loop.iter.check:
123123; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
124124; DEFAULT: vector.ph:
125- ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3 :%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0
126- ; DEFAULT-NEXT: [[BROADCAST_SPLAT4 :%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT3 ]], <16 x i16> poison, <16 x i32> zeroinitializer
127- ; DEFAULT-NEXT: [[TMP7 :%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT4 ]] to <16 x i8>
125+ ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT :%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0
126+ ; DEFAULT-NEXT: [[BROADCAST_SPLAT :%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT ]], <16 x i16> poison, <16 x i32> zeroinitializer
127+ ; DEFAULT-NEXT: [[TMP0 :%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT ]] to <16 x i8>
128128; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
129129; DEFAULT: vector.body:
130130; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
131- ; DEFAULT-NEXT: [[TMP4 :%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]]
132- ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1 :%.*]] = insertelement <16 x i64> poison, i64 [[TMP4 ]], i64 0
133- ; DEFAULT-NEXT: [[BROADCAST_SPLAT2 :%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1 ]], <16 x i64> poison, <16 x i32> zeroinitializer
134- ; DEFAULT-NEXT: [[TMP5 :%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT2 ]] to <16 x i8>
135- ; DEFAULT-NEXT: [[TMP9 :%.*]] = and <16 x i8> [[TMP5 ]], [[TMP7 ]]
136- ; DEFAULT-NEXT: [[TMP10 :%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
137- ; DEFAULT-NEXT: [[TMP13 :%.*]] = getelementptr i8, ptr [[TMP10 ]], i32 16
138- ; DEFAULT-NEXT: store <16 x i8> [[TMP9 ]], ptr [[TMP10 ]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
139- ; DEFAULT-NEXT: store <16 x i8> [[TMP9 ]], ptr [[TMP13 ]], align 1, !alias.scope [[META9]], !noalias [[META6]]
131+ ; DEFAULT-NEXT: [[TMP1 :%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]]
132+ ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT2 :%.*]] = insertelement <16 x i64> poison, i64 [[TMP1 ]], i64 0
133+ ; DEFAULT-NEXT: [[BROADCAST_SPLAT3 :%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT2 ]], <16 x i64> poison, <16 x i32> zeroinitializer
134+ ; DEFAULT-NEXT: [[TMP2 :%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT3 ]] to <16 x i8>
135+ ; DEFAULT-NEXT: [[TMP3 :%.*]] = and <16 x i8> [[TMP2 ]], [[TMP0 ]]
136+ ; DEFAULT-NEXT: [[TMP4 :%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
137+ ; DEFAULT-NEXT: [[TMP5 :%.*]] = getelementptr i8, ptr [[TMP4 ]], i32 16
138+ ; DEFAULT-NEXT: store <16 x i8> [[TMP3 ]], ptr [[TMP4 ]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
139+ ; DEFAULT-NEXT: store <16 x i8> [[TMP3 ]], ptr [[TMP5 ]], align 1, !alias.scope [[META9]], !noalias [[META6]]
140140; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
141- ; DEFAULT-NEXT: [[TMP11 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
142- ; DEFAULT-NEXT: br i1 [[TMP11 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
141+ ; DEFAULT-NEXT: [[TMP6 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
142+ ; DEFAULT-NEXT: br i1 [[TMP6 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
143143; DEFAULT: middle.block:
144144; DEFAULT-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
145145; DEFAULT: vec.epilog.iter.check:
@@ -148,20 +148,20 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
148148; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
149149; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i16> poison, i16 [[X]], i64 0
150150; DEFAULT-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT4]], <8 x i16> poison, <8 x i32> zeroinitializer
151- ; DEFAULT-NEXT: [[TMP15 :%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
151+ ; DEFAULT-NEXT: [[TMP7 :%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
152152; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
153153; DEFAULT: vec.epilog.vector.body:
154- ; DEFAULT-NEXT: [[INDEX5 :%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
155- ; DEFAULT-NEXT: [[TMP16 :%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]]
156- ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP16 ]], i64 0
154+ ; DEFAULT-NEXT: [[INDEX6 :%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
155+ ; DEFAULT-NEXT: [[TMP8 :%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]]
156+ ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP8 ]], i64 0
157157; DEFAULT-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT7]], <8 x i64> poison, <8 x i32> zeroinitializer
158- ; DEFAULT-NEXT: [[TMP18 :%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8>
159- ; DEFAULT-NEXT: [[TMP14 :%.*]] = and <8 x i8> [[TMP18 ]], [[TMP15 ]]
160- ; DEFAULT-NEXT: [[TMP26 :%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX5 ]]
161- ; DEFAULT-NEXT: store <8 x i8> [[TMP14 ]], ptr [[TMP26 ]], align 1, !alias.scope [[META9]], !noalias [[META6]]
162- ; DEFAULT-NEXT: [[INDEX_NEXT8 ]] = add nuw i64 [[INDEX5 ]], 8
163- ; DEFAULT-NEXT: [[TMP17 :%.*]] = icmp eq i64 [[INDEX_NEXT8 ]], 1000
164- ; DEFAULT-NEXT: br i1 [[TMP17 ]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
158+ ; DEFAULT-NEXT: [[TMP9 :%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8>
159+ ; DEFAULT-NEXT: [[TMP10 :%.*]] = and <8 x i8> [[TMP9 ]], [[TMP7 ]]
160+ ; DEFAULT-NEXT: [[TMP11 :%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6 ]]
161+ ; DEFAULT-NEXT: store <8 x i8> [[TMP10 ]], ptr [[TMP11 ]], align 1, !alias.scope [[META9]], !noalias [[META6]]
162+ ; DEFAULT-NEXT: [[INDEX_NEXT9 ]] = add nuw i64 [[INDEX6 ]], 8
163+ ; DEFAULT-NEXT: [[TMP12 :%.*]] = icmp eq i64 [[INDEX_NEXT9 ]], 1000
164+ ; DEFAULT-NEXT: br i1 [[TMP12 ]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
165165; DEFAULT: vec.epilog.middle.block:
166166; DEFAULT-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
167167; DEFAULT: vec.epilog.scalar.ph:
@@ -186,35 +186,35 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
186186; PRED-NEXT: entry:
187187; PRED-NEXT: br label [[VECTOR_MEMCHECK:%.*]]
188188; PRED: vector.memcheck:
189- ; PRED-NEXT: [[SCEVGEP1 :%.*]] = getelementptr i8, ptr [[DST]], i64 1000
190- ; PRED-NEXT: [[SCEVGEP :%.*]] = getelementptr i8, ptr [[SRC]], i64 8
191- ; PRED-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP ]]
192- ; PRED-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1 ]]
189+ ; PRED-NEXT: [[SCEVGEP :%.*]] = getelementptr i8, ptr [[DST]], i64 1000
190+ ; PRED-NEXT: [[SCEVGEP1 :%.*]] = getelementptr i8, ptr [[SRC]], i64 8
191+ ; PRED-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1 ]]
192+ ; PRED-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP ]]
193193; PRED-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
194194; PRED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
195195; PRED: vector.ph:
196- ; PRED-NEXT: [[TMP10 :%.*]] = call i64 @llvm.vscale.i64()
197- ; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP10 ]], 2
196+ ; PRED-NEXT: [[TMP0 :%.*]] = call i64 @llvm.vscale.i64()
197+ ; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0 ]], 2
198198; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[X]], i64 0
199199; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
200200; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1000)
201- ; PRED-NEXT: [[TMP11 :%.*]] = trunc <vscale x 2 x i16> [[BROADCAST_SPLAT]] to <vscale x 2 x i8>
201+ ; PRED-NEXT: [[TMP2 :%.*]] = trunc <vscale x 2 x i16> [[BROADCAST_SPLAT]] to <vscale x 2 x i8>
202202; PRED-NEXT: br label [[VECTOR_BODY:%.*]]
203203; PRED: vector.body:
204204; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
205205; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
206- ; PRED-NEXT: [[TMP7 :%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META3:![0-9]+]]
207- ; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP7 ]], i64 0
206+ ; PRED-NEXT: [[TMP3 :%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META3:![0-9]+]]
207+ ; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP3 ]], i64 0
208208; PRED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
209- ; PRED-NEXT: [[TMP8 :%.*]] = trunc <vscale x 2 x i64> [[BROADCAST_SPLAT3]] to <vscale x 2 x i8>
210- ; PRED-NEXT: [[TMP9 :%.*]] = and <vscale x 2 x i8> [[TMP8 ]], [[TMP11 ]]
211- ; PRED-NEXT: [[TMP5 :%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
212- ; PRED-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP9 ]], ptr align 1 [[TMP5 ]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META6:![0-9]+]], !noalias [[META3]]
209+ ; PRED-NEXT: [[TMP4 :%.*]] = trunc <vscale x 2 x i64> [[BROADCAST_SPLAT3]] to <vscale x 2 x i8>
210+ ; PRED-NEXT: [[TMP5 :%.*]] = and <vscale x 2 x i8> [[TMP4 ]], [[TMP2 ]]
211+ ; PRED-NEXT: [[TMP6 :%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
212+ ; PRED-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP5 ]], ptr align 1 [[TMP6 ]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META6:![0-9]+]], !noalias [[META3]]
213213; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
214214; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1000)
215- ; PRED-NEXT: [[TMP12 :%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
216- ; PRED-NEXT: [[TMP13 :%.*]] = xor i1 [[TMP12 ]], true
217- ; PRED-NEXT: br i1 [[TMP13 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
215+ ; PRED-NEXT: [[TMP7 :%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
216+ ; PRED-NEXT: [[TMP8 :%.*]] = xor i1 [[TMP7 ]], true
217+ ; PRED-NEXT: br i1 [[TMP8 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
218218; PRED: middle.block:
219219; PRED-NEXT: br label [[EXIT:%.*]]
220220; PRED: scalar.ph:
0 commit comments