Skip to content

Commit 6028e94

Browse files
committed
[AArch64][GlobalISel] Added correct lowering for pmull64
pmull64 needs its two i64 operands to be placed into floating point registers, so they can be treated as vectors. Placing the gMIR opcode into OnlyUsesFP within ensures these operands are placed onto fprs during Register Bank Selection.
1 parent 35db481 commit 6028e94

File tree

3 files changed

+75
-33
lines changed

3 files changed

+75
-33
lines changed

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -482,8 +482,6 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
482482
case Intrinsic::aarch64_neon_sqrdmulh:
483483
case Intrinsic::aarch64_neon_sqadd:
484484
case Intrinsic::aarch64_neon_sqsub:
485-
case Intrinsic::aarch64_neon_pmull:
486-
case Intrinsic::aarch64_neon_pmull64:
487485
case Intrinsic::aarch64_crypto_sha1h:
488486
case Intrinsic::aarch64_crypto_sha1c:
489487
case Intrinsic::aarch64_crypto_sha1p:
@@ -562,6 +560,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
562560
case TargetOpcode::G_FCMP:
563561
case TargetOpcode::G_LROUND:
564562
case TargetOpcode::G_LLROUND:
563+
case AArch64::G_PMULL:
565564
return true;
566565
case TargetOpcode::G_INTRINSIC:
567566
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {

llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2716,27 +2716,47 @@ entry:
27162716
}
27172717

27182718
define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
2719-
; CHECK-LABEL: test_vmull_p64:
2720-
; CHECK: // %bb.0: // %entry
2721-
; CHECK-NEXT: fmov d0, x1
2722-
; CHECK-NEXT: fmov d1, x0
2723-
; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d
2724-
; CHECK-NEXT: mov x1, v0.d[1]
2725-
; CHECK-NEXT: fmov x0, d0
2726-
; CHECK-NEXT: ret
2719+
; CHECK-SD-LABEL: test_vmull_p64:
2720+
; CHECK-SD: // %bb.0: // %entry
2721+
; CHECK-SD-NEXT: fmov d0, x1
2722+
; CHECK-SD-NEXT: fmov d1, x0
2723+
; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d
2724+
; CHECK-SD-NEXT: mov x1, v0.d[1]
2725+
; CHECK-SD-NEXT: fmov x0, d0
2726+
; CHECK-SD-NEXT: ret
2727+
;
2728+
; CHECK-GI-LABEL: test_vmull_p64:
2729+
; CHECK-GI: // %bb.0: // %entry
2730+
; CHECK-GI-NEXT: fmov d0, x0
2731+
; CHECK-GI-NEXT: fmov d1, x1
2732+
; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
2733+
; CHECK-GI-NEXT: mov d1, v0.d[1]
2734+
; CHECK-GI-NEXT: fmov x0, d0
2735+
; CHECK-GI-NEXT: fmov x1, d1
2736+
; CHECK-GI-NEXT: ret
27272737
entry:
27282738
%vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b)
27292739
%vmull3.i = bitcast <16 x i8> %vmull2.i to i128
27302740
ret i128 %vmull3.i
27312741
}
27322742

27332743
define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
2734-
; CHECK-LABEL: test_vmull_high_p64:
2735-
; CHECK: // %bb.0: // %entry
2736-
; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d
2737-
; CHECK-NEXT: mov x1, v0.d[1]
2738-
; CHECK-NEXT: fmov x0, d0
2739-
; CHECK-NEXT: ret
2744+
; CHECK-SD-LABEL: test_vmull_high_p64:
2745+
; CHECK-SD: // %bb.0: // %entry
2746+
; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d
2747+
; CHECK-SD-NEXT: mov x1, v0.d[1]
2748+
; CHECK-SD-NEXT: fmov x0, d0
2749+
; CHECK-SD-NEXT: ret
2750+
;
2751+
; CHECK-GI-LABEL: test_vmull_high_p64:
2752+
; CHECK-GI: // %bb.0: // %entry
2753+
; CHECK-GI-NEXT: mov d0, v0.d[1]
2754+
; CHECK-GI-NEXT: mov d1, v1.d[1]
2755+
; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
2756+
; CHECK-GI-NEXT: mov d1, v0.d[1]
2757+
; CHECK-GI-NEXT: fmov x0, d0
2758+
; CHECK-GI-NEXT: fmov x1, d1
2759+
; CHECK-GI-NEXT: ret
27402760
entry:
27412761
%0 = extractelement <2 x i64> %a, i32 1
27422762
%1 = extractelement <2 x i64> %b, i32 1

llvm/test/CodeGen/AArch64/arm64-vmul.ll

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3221,35 +3221,58 @@ define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind {
32213221
}
32223222

32233223
define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind {
3224-
; CHECK-LABEL: test_pmull_64:
3225-
; CHECK: // %bb.0:
3226-
; CHECK-NEXT: fmov d0, x1
3227-
; CHECK-NEXT: fmov d1, x0
3228-
; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d
3229-
; CHECK-NEXT: ret
3224+
; CHECK-SD-LABEL: test_pmull_64:
3225+
; CHECK-SD: // %bb.0:
3226+
; CHECK-SD-NEXT: fmov d0, x1
3227+
; CHECK-SD-NEXT: fmov d1, x0
3228+
; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d
3229+
; CHECK-SD-NEXT: ret
3230+
;
3231+
; CHECK-GI-LABEL: test_pmull_64:
3232+
; CHECK-GI: // %bb.0:
3233+
; CHECK-GI-NEXT: fmov d0, x0
3234+
; CHECK-GI-NEXT: fmov d1, x1
3235+
; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
3236+
; CHECK-GI-NEXT: ret
32303237
%val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
32313238
ret <16 x i8> %val
32323239
}
32333240

32343241
define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
3235-
; CHECK-LABEL: test_pmull_high_64:
3236-
; CHECK: // %bb.0:
3237-
; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d
3238-
; CHECK-NEXT: ret
3242+
; CHECK-SD-LABEL: test_pmull_high_64:
3243+
; CHECK-SD: // %bb.0:
3244+
; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d
3245+
; CHECK-SD-NEXT: ret
3246+
;
3247+
; CHECK-GI-LABEL: test_pmull_high_64:
3248+
; CHECK-GI: // %bb.0:
3249+
; CHECK-GI-NEXT: mov d0, v0.d[1]
3250+
; CHECK-GI-NEXT: mov d1, v1.d[1]
3251+
; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
3252+
; CHECK-GI-NEXT: ret
32393253
%l_hi = extractelement <2 x i64> %l, i32 1
32403254
%r_hi = extractelement <2 x i64> %r, i32 1
32413255
%val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi)
32423256
ret <16 x i8> %val
32433257
}
32443258

32453259
define <16 x i8> @test_commutable_pmull_64(i64 %l, i64 %r) nounwind {
3246-
; CHECK-LABEL: test_commutable_pmull_64:
3247-
; CHECK: // %bb.0:
3248-
; CHECK-NEXT: fmov d0, x1
3249-
; CHECK-NEXT: fmov d1, x0
3250-
; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d
3251-
; CHECK-NEXT: add v0.16b, v0.16b, v0.16b
3252-
; CHECK-NEXT: ret
3260+
; CHECK-SD-LABEL: test_commutable_pmull_64:
3261+
; CHECK-SD: // %bb.0:
3262+
; CHECK-SD-NEXT: fmov d0, x1
3263+
; CHECK-SD-NEXT: fmov d1, x0
3264+
; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d
3265+
; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b
3266+
; CHECK-SD-NEXT: ret
3267+
;
3268+
; CHECK-GI-LABEL: test_commutable_pmull_64:
3269+
; CHECK-GI: // %bb.0:
3270+
; CHECK-GI-NEXT: fmov d0, x0
3271+
; CHECK-GI-NEXT: fmov d1, x1
3272+
; CHECK-GI-NEXT: pmull v2.1q, v0.1d, v1.1d
3273+
; CHECK-GI-NEXT: pmull v0.1q, v1.1d, v0.1d
3274+
; CHECK-GI-NEXT: add v0.16b, v2.16b, v0.16b
3275+
; CHECK-GI-NEXT: ret
32533276
%1 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
32543277
%2 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %r, i64 %l)
32553278
%3 = add <16 x i8> %1, %2

0 commit comments

Comments
 (0)