@@ -107,9 +107,7 @@ define <8 x i8> @sqxtn8b(<8 x i16> %A) nounwind #0 {
107107; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
108108; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
109109; CHECK-NEXT: call void @llvm.donothing()
110- ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
111- ; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
112- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <8 x i8>
110+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
113111; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[A]])
114112; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
115113; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -123,9 +121,7 @@ define <4 x i16> @sqxtn4h(<4 x i32> %A) nounwind #0 {
123121; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
124122; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
125123; CHECK-NEXT: call void @llvm.donothing()
126- ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
127- ; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
128- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <4 x i16>
124+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
129125; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]])
130126; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
131127; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -139,8 +135,7 @@ define <2 x i32> @sqxtn2s(<2 x i64> %A) nounwind #0 {
139135; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
140136; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
141137; CHECK-NEXT: call void @llvm.donothing()
142- ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
143- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
138+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
144139; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[A]])
145140; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
146141; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -155,9 +150,7 @@ define <16 x i8> @sqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
155150; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
156151; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
157152; CHECK-NEXT: call void @llvm.donothing()
158- ; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
159- ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP6]] to i64
160- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <8 x i8>
153+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
161154; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[A]])
162155; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
163156; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -175,9 +168,7 @@ define <8 x i16> @sqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
175168; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
176169; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
177170; CHECK-NEXT: call void @llvm.donothing()
178- ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
179- ; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP6]] to i64
180- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <4 x i16>
171+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
181172; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]])
182173; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
183174; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -195,8 +186,7 @@ define <4 x i32> @sqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
195186; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
196187; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
197188; CHECK-NEXT: call void @llvm.donothing()
198- ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
199- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <2 x i32>
189+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
200190; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[A]])
201191; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
202192; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -217,9 +207,7 @@ define <8 x i8> @uqxtn8b(<8 x i16> %A) nounwind #0 {
217207; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
218208; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
219209; CHECK-NEXT: call void @llvm.donothing()
220- ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
221- ; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
222- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <8 x i8>
210+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
223211; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[A]])
224212; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
225213; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -233,9 +221,7 @@ define <4 x i16> @uqxtn4h(<4 x i32> %A) nounwind #0 {
233221; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
234222; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
235223; CHECK-NEXT: call void @llvm.donothing()
236- ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
237- ; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
238- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <4 x i16>
224+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
239225; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]])
240226; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
241227; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -249,8 +235,7 @@ define <2 x i32> @uqxtn2s(<2 x i64> %A) nounwind #0 {
249235; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
250236; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
251237; CHECK-NEXT: call void @llvm.donothing()
252- ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
253- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
238+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
254239; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[A]])
255240; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
256241; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -265,9 +250,7 @@ define <16 x i8> @uqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
265250; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
266251; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
267252; CHECK-NEXT: call void @llvm.donothing()
268- ; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
269- ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP6]] to i64
270- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <8 x i8>
253+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
271254; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[A]])
272255; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
273256; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -285,9 +268,7 @@ define <8 x i16> @uqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
285268; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
286269; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
287270; CHECK-NEXT: call void @llvm.donothing()
288- ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
289- ; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP6]] to i64
290- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <4 x i16>
271+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
291272; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]])
292273; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
293274; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -305,8 +286,7 @@ define <4 x i32> @uqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
305286; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
306287; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
307288; CHECK-NEXT: call void @llvm.donothing()
308- ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
309- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <2 x i32>
289+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
310290; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[A]])
311291; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
312292; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -327,9 +307,7 @@ define <8 x i8> @sqxtun8b(<8 x i16> %A) nounwind #0 {
327307; CHECK-SAME: <8 x i16> [[A:%.*]]) #[[ATTR0]] {
328308; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
329309; CHECK-NEXT: call void @llvm.donothing()
330- ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
331- ; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
332- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <8 x i8>
310+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
333311; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[A]])
334312; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
335313; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -343,9 +321,7 @@ define <4 x i16> @sqxtun4h(<4 x i32> %A) nounwind #0 {
343321; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
344322; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
345323; CHECK-NEXT: call void @llvm.donothing()
346- ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
347- ; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
348- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <4 x i16>
324+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
349325; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
350326; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
351327; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -359,8 +335,7 @@ define <2 x i32> @sqxtun2s(<2 x i64> %A) nounwind #0 {
359335; CHECK-SAME: <2 x i64> [[A:%.*]]) #[[ATTR0]] {
360336; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
361337; CHECK-NEXT: call void @llvm.donothing()
362- ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
363- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
338+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
364339; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[A]])
365340; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
366341; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -375,9 +350,7 @@ define <16 x i8> @sqxtun2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind #0 {
375350; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
376351; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
377352; CHECK-NEXT: call void @llvm.donothing()
378- ; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]])
379- ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP6]] to i64
380- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <8 x i8>
353+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
381354; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[A]])
382355; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
383356; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[RET]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -395,9 +368,7 @@ define <8 x i16> @sqxtun2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind #0 {
395368; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
396369; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
397370; CHECK-NEXT: call void @llvm.donothing()
398- ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
399- ; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP6]] to i64
400- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP4]] to <4 x i16>
371+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
401372; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]])
402373; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
403374; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[RET]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -415,8 +386,7 @@ define <4 x i32> @sqxtun2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind #0 {
415386; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
416387; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
417388; CHECK-NEXT: call void @llvm.donothing()
418- ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
419- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP5]] to <2 x i32>
389+ ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
420390; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[A]])
421391; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
422392; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[RET]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
0 commit comments