Skip to content

Commit 020b822

Browse files
committed
additional test cases with prefix undefs
1 parent 7a1516a commit 020b822

File tree

1 file changed

+113
-1
lines changed

1 file changed

+113
-1
lines changed

llvm/test/CodeGen/AArch64/arm64-trn.ll

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ define <4 x float> @vtrnQf(ptr %A, ptr %B) nounwind {
246246
ret <4 x float> %tmp5
247247
}
248248

249-
; Undef shuffle indices should not prevent matching to VTRN:
249+
; Undef shuffle indices (even at the start of the shuffle mask) should not prevent matching to VTRN:
250250

251251
define <8 x i8> @vtrni8_undef(ptr %A, ptr %B) nounwind {
252252
; CHECKLE-LABEL: vtrni8_undef:
@@ -302,3 +302,115 @@ define <8 x i16> @vtrnQi16_undef(ptr %A, ptr %B) nounwind {
302302
%tmp5 = add <8 x i16> %tmp3, %tmp4
303303
ret <8 x i16> %tmp5
304304
}
305+
306+
define <8 x i16> @vtrnQi16_undef_01(ptr %A, ptr %B) nounwind {
307+
; CHECKLE-LABEL: vtrnQi16_undef_01:
308+
; CHECKLE: // %bb.0:
309+
; CHECKLE-NEXT: ldr q0, [x0]
310+
; CHECKLE-NEXT: ldr q1, [x1]
311+
; CHECKLE-NEXT: trn1 v2.8h, v0.8h, v1.8h
312+
; CHECKLE-NEXT: trn2 v0.8h, v0.8h, v1.8h
313+
; CHECKLE-NEXT: add v0.8h, v2.8h, v0.8h
314+
; CHECKLE-NEXT: ret
315+
;
316+
; CHECKBE-LABEL: vtrnQi16_undef_01:
317+
; CHECKBE: // %bb.0:
318+
; CHECKBE-NEXT: ld1 { v0.8h }, [x0]
319+
; CHECKBE-NEXT: ld1 { v1.8h }, [x1]
320+
; CHECKBE-NEXT: trn1 v2.8h, v0.8h, v1.8h
321+
; CHECKBE-NEXT: trn2 v0.8h, v0.8h, v1.8h
322+
; CHECKBE-NEXT: add v0.8h, v2.8h, v0.8h
323+
; CHECKBE-NEXT: rev64 v0.8h, v0.8h
324+
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
325+
; CHECKBE-NEXT: ret
326+
%tmp1 = load <8 x i16>, ptr %A
327+
%tmp2 = load <8 x i16>, ptr %B
328+
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 2, i32 undef, i32 4, i32 12, i32 6, i32 14>
329+
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 3, i32 11, i32 undef, i32 13, i32 undef, i32 undef>
330+
%tmp5 = add <8 x i16> %tmp3, %tmp4
331+
ret <8 x i16> %tmp5
332+
}
333+
334+
define <8 x i16> @vtrnQi16_undef_0(ptr %A, ptr %B) nounwind {
335+
; CHECKLE-LABEL: vtrnQi16_undef_0:
336+
; CHECKLE: // %bb.0:
337+
; CHECKLE-NEXT: ldr q0, [x0]
338+
; CHECKLE-NEXT: ldr q1, [x1]
339+
; CHECKLE-NEXT: trn1 v2.8h, v0.8h, v1.8h
340+
; CHECKLE-NEXT: trn2 v0.8h, v0.8h, v1.8h
341+
; CHECKLE-NEXT: add v0.8h, v2.8h, v0.8h
342+
; CHECKLE-NEXT: ret
343+
;
344+
; CHECKBE-LABEL: vtrnQi16_undef_0:
345+
; CHECKBE: // %bb.0:
346+
; CHECKBE-NEXT: ld1 { v0.8h }, [x0]
347+
; CHECKBE-NEXT: ld1 { v1.8h }, [x1]
348+
; CHECKBE-NEXT: trn1 v2.8h, v0.8h, v1.8h
349+
; CHECKBE-NEXT: trn2 v0.8h, v0.8h, v1.8h
350+
; CHECKBE-NEXT: add v0.8h, v2.8h, v0.8h
351+
; CHECKBE-NEXT: rev64 v0.8h, v0.8h
352+
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
353+
; CHECKBE-NEXT: ret
354+
%tmp1 = load <8 x i16>, ptr %A
355+
%tmp2 = load <8 x i16>, ptr %B
356+
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 undef, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
357+
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 undef, i32 9, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
358+
%tmp5 = add <8 x i16> %tmp3, %tmp4
359+
ret <8 x i16> %tmp5
360+
}
361+
362+
define <4 x i32> @vtrnQi32_undef_1(ptr %A, ptr %B) nounwind {
363+
; CHECKLE-LABEL: vtrnQi32_undef_1:
364+
; CHECKLE: // %bb.0:
365+
; CHECKLE-NEXT: ldr q0, [x0]
366+
; CHECKLE-NEXT: ldr q1, [x1]
367+
; CHECKLE-NEXT: trn1 v2.4s, v0.4s, v1.4s
368+
; CHECKLE-NEXT: trn2 v0.4s, v0.4s, v1.4s
369+
; CHECKLE-NEXT: add v0.4s, v2.4s, v0.4s
370+
; CHECKLE-NEXT: ret
371+
;
372+
; CHECKBE-LABEL: vtrnQi32_undef_1:
373+
; CHECKBE: // %bb.0:
374+
; CHECKBE-NEXT: ld1 { v0.4s }, [x0]
375+
; CHECKBE-NEXT: ld1 { v1.4s }, [x1]
376+
; CHECKBE-NEXT: trn1 v2.4s, v0.4s, v1.4s
377+
; CHECKBE-NEXT: trn2 v0.4s, v0.4s, v1.4s
378+
; CHECKBE-NEXT: add v0.4s, v2.4s, v0.4s
379+
; CHECKBE-NEXT: rev64 v0.4s, v0.4s
380+
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
381+
; CHECKBE-NEXT: ret
382+
%tmp1 = load <4 x i32>, ptr %A
383+
%tmp2 = load <4 x i32>, ptr %B
384+
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 undef, i32 2, i32 6>
385+
%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 undef, i32 3, i32 7>
386+
%tmp5 = add <4 x i32> %tmp3, %tmp4
387+
ret <4 x i32> %tmp5
388+
}
389+
390+
define <16 x i8> @vtrnQi8_undef_012(ptr %A, ptr %B) nounwind {
391+
; CHECKLE-LABEL: vtrnQi8_undef_012:
392+
; CHECKLE: // %bb.0:
393+
; CHECKLE-NEXT: ldr q0, [x0]
394+
; CHECKLE-NEXT: ldr q1, [x1]
395+
; CHECKLE-NEXT: trn1 v2.16b, v0.16b, v1.16b
396+
; CHECKLE-NEXT: trn2 v0.16b, v0.16b, v1.16b
397+
; CHECKLE-NEXT: add v0.16b, v2.16b, v0.16b
398+
; CHECKLE-NEXT: ret
399+
;
400+
; CHECKBE-LABEL: vtrnQi8_undef_012:
401+
; CHECKBE: // %bb.0:
402+
; CHECKBE-NEXT: ld1 { v0.16b }, [x0]
403+
; CHECKBE-NEXT: ld1 { v1.16b }, [x1]
404+
; CHECKBE-NEXT: trn1 v2.16b, v0.16b, v1.16b
405+
; CHECKBE-NEXT: trn2 v0.16b, v0.16b, v1.16b
406+
; CHECKBE-NEXT: add v0.16b, v2.16b, v0.16b
407+
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
408+
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
409+
; CHECKBE-NEXT: ret
410+
%tmp1 = load <16 x i8>, ptr %A
411+
%tmp2 = load <16 x i8>, ptr %B
412+
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 18, i32 4, i32 undef, i32 6, i32 22, i32 undef, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
413+
%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 19, i32 5, i32 21, i32 7, i32 undef, i32 9, i32 25, i32 11, i32 27, i32 undef, i32 29, i32 15, i32 31>
414+
%tmp5 = add <16 x i8> %tmp3, %tmp4
415+
ret <16 x i8> %tmp5
416+
}

0 commit comments

Comments
 (0)