@@ -1365,7 +1365,72 @@ for.end12: ; preds = %vector.body
1365
1365
ret void
1366
1366
}
1367
1367
1368
- declare i16 @llvm.vector.reduce.add.v8i16 (<8 x i16 >)
1368
+ define noundef <8 x i16 > @cmplx_mul_combined_re_im (<8 x i16 > noundef %a , i64 %scale.coerce ) {
1369
+ ; CHECK-SD-LABEL: cmplx_mul_combined_re_im:
1370
+ ; CHECK-SD: // %bb.0: // %entry
1371
+ ; CHECK-SD-NEXT: lsr x9, x0, #16
1372
+ ; CHECK-SD-NEXT: adrp x8, .LCPI14_0
1373
+ ; CHECK-SD-NEXT: dup v4.8h, w0
1374
+ ; CHECK-SD-NEXT: dup v1.8h, w9
1375
+ ; CHECK-SD-NEXT: fmov s3, w9
1376
+ ; CHECK-SD-NEXT: sqneg v2.8h, v1.8h
1377
+ ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
1378
+ ; CHECK-SD-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v1.16b
1379
+ ; CHECK-SD-NEXT: rev32 v2.8h, v0.8h
1380
+ ; CHECK-SD-NEXT: sqdmull v3.4s, v0.4h, v4.4h
1381
+ ; CHECK-SD-NEXT: sqdmull2 v0.4s, v0.8h, v4.8h
1382
+ ; CHECK-SD-NEXT: sqdmlal v3.4s, v2.4h, v1.4h
1383
+ ; CHECK-SD-NEXT: sqdmlal2 v0.4s, v2.8h, v1.8h
1384
+ ; CHECK-SD-NEXT: uzp2 v0.8h, v3.8h, v0.8h
1385
+ ; CHECK-SD-NEXT: ret
1386
+ ;
1387
+ ; CHECK-GI-LABEL: cmplx_mul_combined_re_im:
1388
+ ; CHECK-GI: // %bb.0: // %entry
1389
+ ; CHECK-GI-NEXT: lsr w9, w0, #16
1390
+ ; CHECK-GI-NEXT: adrp x8, .LCPI14_0
1391
+ ; CHECK-GI-NEXT: rev32 v4.8h, v0.8h
1392
+ ; CHECK-GI-NEXT: dup v1.8h, w9
1393
+ ; CHECK-GI-NEXT: fmov s3, w9
1394
+ ; CHECK-GI-NEXT: sqneg v2.8h, v1.8h
1395
+ ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
1396
+ ; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v1.16b
1397
+ ; CHECK-GI-NEXT: mov d2, v0.d[1]
1398
+ ; CHECK-GI-NEXT: dup v3.8h, w0
1399
+ ; CHECK-GI-NEXT: sqdmull v2.4s, v2.4h, v3.4h
1400
+ ; CHECK-GI-NEXT: sqdmull v5.4s, v4.4h, v1.4h
1401
+ ; CHECK-GI-NEXT: sqdmlal v5.4s, v0.4h, v3.4h
1402
+ ; CHECK-GI-NEXT: sqdmlal2 v2.4s, v4.8h, v1.8h
1403
+ ; CHECK-GI-NEXT: uzp2 v0.8h, v5.8h, v2.8h
1404
+ ; CHECK-GI-NEXT: ret
1405
+ entry:
1406
+ %scale.sroa.0.0.extract.trunc = trunc i64 %scale.coerce to i16
1407
+ %scale.sroa.2.0.extract.shift23 = lshr i64 %scale.coerce , 16
1408
+ %scale.sroa.2.0.extract.trunc = trunc i64 %scale.sroa.2.0.extract.shift23 to i16
1409
+ %shuffle.i = shufflevector <8 x i16 > %a , <8 x i16 > poison, <8 x i32 > <i32 1 , i32 0 , i32 3 , i32 2 , i32 5 , i32 4 , i32 7 , i32 6 >
1410
+ %vecinit.i24 = insertelement <8 x i16 > poison, i16 %scale.sroa.0.0.extract.trunc , i64 0
1411
+ %vecinit.i = insertelement <8 x i16 > poison, i16 %scale.sroa.2.0.extract.trunc , i64 0
1412
+ %vecinit7.i = shufflevector <8 x i16 > %vecinit.i , <8 x i16 > poison, <8 x i32 > zeroinitializer
1413
+ %vqnegq_v1.i = tail call noundef <8 x i16 > @llvm.aarch64.neon.sqneg.v8i16 (<8 x i16 > %vecinit7.i )
1414
+ %vbsl5.i = shufflevector <8 x i16 > %vqnegq_v1.i , <8 x i16 > %vecinit.i , <8 x i32 > <i32 0 , i32 8 , i32 2 , i32 8 , i32 4 , i32 8 , i32 6 , i32 8 >
1415
+ %shuffle.i40 = shufflevector <8 x i16 > %a , <8 x i16 > poison, <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1416
+ %shuffle.i39 = shufflevector <8 x i16 > %vecinit.i24 , <8 x i16 > poison, <4 x i32 > zeroinitializer
1417
+ %vqdmull_v2.i36 = tail call noundef <4 x i32 > @llvm.aarch64.neon.sqdmull.v4i32 (<4 x i16 > %shuffle.i40 , <4 x i16 > %shuffle.i39 )
1418
+ %shuffle.i44 = shufflevector <8 x i16 > %a , <8 x i16 > poison, <4 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 >
1419
+ %vqdmull_v2.i = tail call noundef <4 x i32 > @llvm.aarch64.neon.sqdmull.v4i32 (<4 x i16 > %shuffle.i44 , <4 x i16 > %shuffle.i39 )
1420
+ %shuffle.i38 = shufflevector <8 x i16 > %shuffle.i , <8 x i16 > poison, <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1421
+ %shuffle.i37 = shufflevector <8 x i16 > %vbsl5.i , <8 x i16 > poison, <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1422
+ %vqdmlal2.i45 = tail call <4 x i32 > @llvm.aarch64.neon.sqdmull.v4i32 (<4 x i16 > %shuffle.i38 , <4 x i16 > %shuffle.i37 )
1423
+ %vqdmlal_v3.i46 = tail call noundef <4 x i32 > @llvm.aarch64.neon.sqadd.v4i32 (<4 x i32 > %vqdmull_v2.i36 , <4 x i32 > %vqdmlal2.i45 )
1424
+ %shuffle.i42 = shufflevector <8 x i16 > %shuffle.i , <8 x i16 > poison, <4 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 >
1425
+ %shuffle.i41 = shufflevector <8 x i16 > %vbsl5.i , <8 x i16 > poison, <4 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 >
1426
+ %vqdmlal2.i = tail call <4 x i32 > @llvm.aarch64.neon.sqdmull.v4i32 (<4 x i16 > %shuffle.i42 , <4 x i16 > %shuffle.i41 )
1427
+ %vqdmlal_v3.i = tail call noundef <4 x i32 > @llvm.aarch64.neon.sqadd.v4i32 (<4 x i32 > %vqdmull_v2.i , <4 x i32 > %vqdmlal2.i )
1428
+ %0 = bitcast <4 x i32 > %vqdmlal_v3.i46 to <8 x i16 >
1429
+ %1 = bitcast <4 x i32 > %vqdmlal_v3.i to <8 x i16 >
1430
+ %shuffle.i35 = shufflevector <8 x i16 > %0 , <8 x i16 > %1 , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
1431
+ ret <8 x i16 > %shuffle.i35
1432
+ }
1433
+
1369
1434
1370
1435
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1371
1436
; CHECK: {{.*}}
0 commit comments