@@ -246,85 +246,61 @@ define <4 x float> @vtrnQf(ptr %A, ptr %B) nounwind {
246246 ret <4 x float > %tmp5
247247}
248248
249- define <8 x i8 > @vtrni8_8first ( ptr %A , ptr %B ) nounwind {
250- ; CHECKLE-LABEL: vtrni8_8first :
249+ define <8 x i8 > @vtrni8_trn1_flipped (< 8 x i8 > %A , < 8 x i8 > %B ) nounwind {
250+ ; CHECKLE-LABEL: vtrni8_trn1_flipped :
251251; CHECKLE: // %bb.0:
252- ; CHECKLE-NEXT: ldr d0, [x0]
253- ; CHECKLE-NEXT: ldr d1, [x1]
254- ; CHECKLE-NEXT: trn1 v2.8b, v1.8b, v0.8b
255- ; CHECKLE-NEXT: trn2 v0.8b, v0.8b, v1.8b
256- ; CHECKLE-NEXT: add v0.8b, v2.8b, v0.8b
252+ ; CHECKLE-NEXT: trn1 v0.8b, v1.8b, v0.8b
257253; CHECKLE-NEXT: ret
258254;
259- ; CHECKBE-LABEL: vtrni8_8first :
255+ ; CHECKBE-LABEL: vtrni8_trn1_flipped :
260256; CHECKBE: // %bb.0:
261- ; CHECKBE-NEXT: ld1 { v0.8b }, [x0]
262- ; CHECKBE-NEXT: ld1 { v1.8b }, [x1]
263- ; CHECKBE-NEXT: trn1 v2.8b, v1.8b, v0.8b
264- ; CHECKBE-NEXT: trn2 v0.8b, v0.8b, v1.8b
265- ; CHECKBE-NEXT: add v0.8b, v2.8b, v0.8b
257+ ; CHECKBE-NEXT: rev64 v0.8b, v0.8b
258+ ; CHECKBE-NEXT: rev64 v1.8b, v1.8b
259+ ; CHECKBE-NEXT: trn1 v0.8b, v1.8b, v0.8b
266260; CHECKBE-NEXT: rev64 v0.8b, v0.8b
267261; CHECKBE-NEXT: ret
268- %tmp1 = load <8 x i8 >, ptr %A
269- %tmp2 = load <8 x i8 >, ptr %B
270- %tmp3 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 8 , i32 0 , i32 10 , i32 2 , i32 12 , i32 4 , i32 14 , i32 6 >
271- %tmp4 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 1 , i32 9 , i32 3 , i32 11 , i32 5 , i32 13 , i32 7 , i32 15 >
272- %tmp5 = add <8 x i8 > %tmp3 , %tmp4
273- ret <8 x i8 > %tmp5
262+ %tmp1 = shufflevector <8 x i8 > %A , <8 x i8 > %B , <8 x i32 > <i32 8 , i32 0 , i32 10 , i32 2 , i32 12 , i32 4 , i32 14 , i32 6 >
263+ ret <8 x i8 > %tmp1
274264}
275265
276- define <8 x i8 > @vtrni8_9first ( ptr %A , ptr %B ) nounwind {
277- ; CHECKLE-LABEL: vtrni8_9first :
266+ define <8 x i8 > @vtrni8_trn2_flipped (< 8 x i8 > %A , < 8 x i8 > %B ) nounwind {
267+ ; CHECKLE-LABEL: vtrni8_trn2_flipped :
278268; CHECKLE: // %bb.0:
279- ; CHECKLE-NEXT: ldr d0, [x0]
280- ; CHECKLE-NEXT: ldr d1, [x1]
281- ; CHECKLE-NEXT: trn1 v2.8b, v1.8b, v0.8b
282269; CHECKLE-NEXT: trn2 v0.8b, v1.8b, v0.8b
283- ; CHECKLE-NEXT: add v0.8b, v2.8b, v0.8b
284270; CHECKLE-NEXT: ret
285271;
286- ; CHECKBE-LABEL: vtrni8_9first :
272+ ; CHECKBE-LABEL: vtrni8_trn2_flipped :
287273; CHECKBE: // %bb.0:
288- ; CHECKBE-NEXT: ld1 { v0.8b }, [x0]
289- ; CHECKBE-NEXT: ld1 { v1.8b }, [x1]
290- ; CHECKBE-NEXT: trn1 v2.8b, v1.8b, v0.8b
274+ ; CHECKBE-NEXT: rev64 v0.8b, v0.8b
275+ ; CHECKBE-NEXT: rev64 v1.8b, v1.8b
291276; CHECKBE-NEXT: trn2 v0.8b, v1.8b, v0.8b
292- ; CHECKBE-NEXT: add v0.8b, v2.8b, v0.8b
293277; CHECKBE-NEXT: rev64 v0.8b, v0.8b
294278; CHECKBE-NEXT: ret
295- %tmp1 = load <8 x i8 >, ptr %A
296- %tmp2 = load <8 x i8 >, ptr %B
297- %tmp3 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 8 , i32 0 , i32 10 , i32 2 , i32 12 , i32 4 , i32 14 , i32 6 >
298- %tmp4 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 9 , i32 1 , i32 11 , i32 3 , i32 13 , i32 5 , i32 15 , i32 7 >
299- %tmp5 = add <8 x i8 > %tmp3 , %tmp4
300- ret <8 x i8 > %tmp5
279+ %tmp1 = shufflevector <8 x i8 > %A , <8 x i8 > %B , <8 x i32 > <i32 9 , i32 1 , i32 11 , i32 3 , i32 13 , i32 5 , i32 15 , i32 7 >
280+ ret <8 x i8 > %tmp1
301281}
302282
303- define <8 x i8 > @vtrni8_89first_undef ( ptr %A , ptr %B ) nounwind {
304- ; CHECKLE-LABEL: vtrni8_89first_undef :
283+ define <8 x i8 > @vtrni8_both_flipped_with_poison_values (< 8 x i8 > %A , < 8 x i8 > %B ) nounwind {
284+ ; CHECKLE-LABEL: vtrni8_both_flipped_with_poison_values :
305285; CHECKLE: // %bb.0:
306- ; CHECKLE-NEXT: ldr d0, [x0]
307- ; CHECKLE-NEXT: ldr d1, [x1]
308286; CHECKLE-NEXT: trn1 v2.8b, v1.8b, v0.8b
309287; CHECKLE-NEXT: trn2 v0.8b, v1.8b, v0.8b
310288; CHECKLE-NEXT: add v0.8b, v2.8b, v0.8b
311289; CHECKLE-NEXT: ret
312290;
313- ; CHECKBE-LABEL: vtrni8_89first_undef :
291+ ; CHECKBE-LABEL: vtrni8_both_flipped_with_poison_values :
314292; CHECKBE: // %bb.0:
315- ; CHECKBE-NEXT: ld1 { v0.8b }, [x0]
316- ; CHECKBE-NEXT: ld1 { v1.8b }, [x1]
293+ ; CHECKBE-NEXT: rev64 v0.8b, v0.8b
294+ ; CHECKBE-NEXT: rev64 v1.8b, v1.8b
317295; CHECKBE-NEXT: trn1 v2.8b, v1.8b, v0.8b
318296; CHECKBE-NEXT: trn2 v0.8b, v1.8b, v0.8b
319297; CHECKBE-NEXT: add v0.8b, v2.8b, v0.8b
320298; CHECKBE-NEXT: rev64 v0.8b, v0.8b
321299; CHECKBE-NEXT: ret
322- %tmp1 = load <8 x i8 >, ptr %A
323- %tmp2 = load <8 x i8 >, ptr %B
324- %tmp3 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 8 , i32 0 , i32 poison, i32 2 , i32 poison, i32 4 , i32 14 , i32 6 >
325- %tmp4 = shufflevector <8 x i8 > %tmp1 , <8 x i8 > %tmp2 , <8 x i32 > <i32 9 , i32 1 , i32 poison, i32 3 , i32 13 , i32 5 , i32 15 , i32 poison>
326- %tmp5 = add <8 x i8 > %tmp3 , %tmp4
327- ret <8 x i8 > %tmp5
300+ %tmp1 = shufflevector <8 x i8 > %A , <8 x i8 > %B , <8 x i32 > <i32 poison, i32 0 , i32 poison, i32 2 , i32 poison, i32 4 , i32 14 , i32 6 >
301+ %tmp2 = shufflevector <8 x i8 > %A , <8 x i8 > %B , <8 x i32 > <i32 poison, i32 1 , i32 poison, i32 3 , i32 13 , i32 5 , i32 15 , i32 poison>
302+ %tmp3 = add <8 x i8 > %tmp1 , %tmp2
303+ ret <8 x i8 > %tmp3
328304}
329305
330306; Undef shuffle indices (even at the start of the shuffle mask) should not prevent matching to VTRN:
0 commit comments