Skip to content

Commit 3ca527d

Browse files
committed
[AArch64] Add a test case showing both dup and scalar_to_reg in the same function. NFC
1 parent 0aba5bf commit 3ca527d

File tree

1 file changed

+66
-1
lines changed

1 file changed

+66
-1
lines changed

llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1365,7 +1365,72 @@ for.end12: ; preds = %vector.body
13651365
ret void
13661366
}
13671367

1368-
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
1368+
define noundef <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coerce) {
1369+
; CHECK-SD-LABEL: cmplx_mul_combined_re_im:
1370+
; CHECK-SD: // %bb.0: // %entry
1371+
; CHECK-SD-NEXT: lsr x9, x0, #16
1372+
; CHECK-SD-NEXT: adrp x8, .LCPI14_0
1373+
; CHECK-SD-NEXT: dup v4.8h, w0
1374+
; CHECK-SD-NEXT: dup v1.8h, w9
1375+
; CHECK-SD-NEXT: fmov s3, w9
1376+
; CHECK-SD-NEXT: sqneg v2.8h, v1.8h
1377+
; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
1378+
; CHECK-SD-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v1.16b
1379+
; CHECK-SD-NEXT: rev32 v2.8h, v0.8h
1380+
; CHECK-SD-NEXT: sqdmull v3.4s, v0.4h, v4.4h
1381+
; CHECK-SD-NEXT: sqdmull2 v0.4s, v0.8h, v4.8h
1382+
; CHECK-SD-NEXT: sqdmlal v3.4s, v2.4h, v1.4h
1383+
; CHECK-SD-NEXT: sqdmlal2 v0.4s, v2.8h, v1.8h
1384+
; CHECK-SD-NEXT: uzp2 v0.8h, v3.8h, v0.8h
1385+
; CHECK-SD-NEXT: ret
1386+
;
1387+
; CHECK-GI-LABEL: cmplx_mul_combined_re_im:
1388+
; CHECK-GI: // %bb.0: // %entry
1389+
; CHECK-GI-NEXT: lsr w9, w0, #16
1390+
; CHECK-GI-NEXT: adrp x8, .LCPI14_0
1391+
; CHECK-GI-NEXT: rev32 v4.8h, v0.8h
1392+
; CHECK-GI-NEXT: dup v1.8h, w9
1393+
; CHECK-GI-NEXT: fmov s3, w9
1394+
; CHECK-GI-NEXT: sqneg v2.8h, v1.8h
1395+
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
1396+
; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v1.16b
1397+
; CHECK-GI-NEXT: mov d2, v0.d[1]
1398+
; CHECK-GI-NEXT: dup v3.8h, w0
1399+
; CHECK-GI-NEXT: sqdmull v2.4s, v2.4h, v3.4h
1400+
; CHECK-GI-NEXT: sqdmull v5.4s, v4.4h, v1.4h
1401+
; CHECK-GI-NEXT: sqdmlal v5.4s, v0.4h, v3.4h
1402+
; CHECK-GI-NEXT: sqdmlal2 v2.4s, v4.8h, v1.8h
1403+
; CHECK-GI-NEXT: uzp2 v0.8h, v5.8h, v2.8h
1404+
; CHECK-GI-NEXT: ret
1405+
entry:
1406+
%scale.sroa.0.0.extract.trunc = trunc i64 %scale.coerce to i16
1407+
%scale.sroa.2.0.extract.shift23 = lshr i64 %scale.coerce, 16
1408+
%scale.sroa.2.0.extract.trunc = trunc i64 %scale.sroa.2.0.extract.shift23 to i16
1409+
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
1410+
%vecinit.i24 = insertelement <8 x i16> poison, i16 %scale.sroa.0.0.extract.trunc, i64 0
1411+
%vecinit.i = insertelement <8 x i16> poison, i16 %scale.sroa.2.0.extract.trunc, i64 0
1412+
%vecinit7.i = shufflevector <8 x i16> %vecinit.i, <8 x i16> poison, <8 x i32> zeroinitializer
1413+
%vqnegq_v1.i = tail call noundef <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %vecinit7.i)
1414+
%vbsl5.i = shufflevector <8 x i16> %vqnegq_v1.i, <8 x i16> %vecinit.i, <8 x i32> <i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6, i32 8>
1415+
%shuffle.i40 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1416+
%shuffle.i39 = shufflevector <8 x i16> %vecinit.i24, <8 x i16> poison, <4 x i32> zeroinitializer
1417+
%vqdmull_v2.i36 = tail call noundef <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i40, <4 x i16> %shuffle.i39)
1418+
%shuffle.i44 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1419+
%vqdmull_v2.i = tail call noundef <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i44, <4 x i16> %shuffle.i39)
1420+
%shuffle.i38 = shufflevector <8 x i16> %shuffle.i, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1421+
%shuffle.i37 = shufflevector <8 x i16> %vbsl5.i, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1422+
%vqdmlal2.i45 = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i38, <4 x i16> %shuffle.i37)
1423+
%vqdmlal_v3.i46 = tail call noundef <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %vqdmull_v2.i36, <4 x i32> %vqdmlal2.i45)
1424+
%shuffle.i42 = shufflevector <8 x i16> %shuffle.i, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1425+
%shuffle.i41 = shufflevector <8 x i16> %vbsl5.i, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1426+
%vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i42, <4 x i16> %shuffle.i41)
1427+
%vqdmlal_v3.i = tail call noundef <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %vqdmull_v2.i, <4 x i32> %vqdmlal2.i)
1428+
%0 = bitcast <4 x i32> %vqdmlal_v3.i46 to <8 x i16>
1429+
%1 = bitcast <4 x i32> %vqdmlal_v3.i to <8 x i16>
1430+
%shuffle.i35 = shufflevector <8 x i16> %0, <8 x i16> %1, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1431+
ret <8 x i16> %shuffle.i35
1432+
}
1433+
13691434

13701435
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
13711436
; CHECK: {{.*}}

0 commit comments

Comments
 (0)