-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AArch64][GlobalISel] Correct instructions for 64bit fneg constant vectors. #166537
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesThis code was assuming that the vectors were 128bit. Add handling for 64bit vectors. Some of the tests do not apply yet due to not matching non-splat vectors. Full diff: https://github.com/llvm/llvm-project/pull/166537.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 14b0f9a564e01..eded9e0283573 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5666,6 +5666,8 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
MachineRegisterInfo &MRI) {
LLT DstTy = MRI.getType(Dst);
unsigned DstSize = DstTy.getSizeInBits();
+ assert((DstSize == 64 || DstSize == 128) && "Unexpected vector constant size");
+
if (CV->isNullValue()) {
if (DstSize == 128) {
auto Mov =
@@ -5735,17 +5737,24 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
// Try to create the new constants with MOVI, and if so generate a fneg
// for it.
if (auto *NewOp = TryMOVIWithBits(NegBits)) {
- Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
+ Register NewDst = MRI.createVirtualRegister(
+ DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
NewOp->getOperand(0).setReg(NewDst);
return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
}
return nullptr;
};
MachineInstr *R;
- if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
- (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
+ if ((R = TryWithFNeg(DefBits, 32,
+ DstSize == 64 ? AArch64::FNEGv2f32
+ : AArch64::FNEGv4f32)) ||
+ (R = TryWithFNeg(DefBits, 64,
+ DstSize == 64 ? AArch64::FNEGDr
+ : AArch64::FNEGv2f64)) ||
(STI.hasFullFP16() &&
- (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
+ (R = TryWithFNeg(DefBits, 16,
+ DstSize == 64 ? AArch64::FNEGv4f16
+ : AArch64::FNEGv8f16))))
return R;
}
diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll
index 5be9394f61b30..4f657865e9f05 100644
--- a/llvm/test/CodeGen/AArch64/neon-mov.ll
+++ b/llvm/test/CodeGen/AArch64/neon-mov.ll
@@ -76,6 +76,15 @@ define <2 x i32> @movi2s_lsl16() {
ret <2 x i32> <i32 16711680, i32 16711680>
}
+define <2 x i32> @movi2s_fneg() {
+; CHECK-LABEL: movi2s_fneg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2s, #240, lsl #8
+; CHECK-NEXT: fneg v0.2s, v0.2s
+; CHECK-NEXT: ret
+ ret <2 x i32> <i32 2147545088, i32 2147545088>
+}
+
define <2 x i32> @movi2s_lsl24() {
; CHECK-LABEL: movi2s_lsl24:
; CHECK: // %bb.0:
@@ -149,6 +158,33 @@ define <4 x i16> @movi4h_lsl8() {
ret <4 x i16> <i16 65280, i16 65280, i16 65280, i16 65280>
}
+define <4 x i16> @movi4h_fneg() {
+; CHECK-NOFP16-SD-LABEL: movi4h_fneg:
+; CHECK-NOFP16-SD: // %bb.0:
+; CHECK-NOFP16-SD-NEXT: movi v0.4h, #127, lsl #8
+; CHECK-NOFP16-SD-NEXT: fneg v0.2s, v0.2s
+; CHECK-NOFP16-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: movi4h_fneg:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: movi v0.4h, #127, lsl #8
+; CHECK-FP16-SD-NEXT: fneg v0.2s, v0.2s
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-NOFP16-GI-LABEL: movi4h_fneg:
+; CHECK-NOFP16-GI: // %bb.0:
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI18_0
+; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI18_0]
+; CHECK-NOFP16-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: movi4h_fneg:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: adrp x8, .LCPI18_0
+; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI18_0]
+; CHECK-FP16-GI-NEXT: ret
+ ret <4 x i16> <i16 32512, i16 65280, i16 32512, i16 65280>
+}
+
define <8 x i16> @movi8h_lsl0() {
; CHECK-LABEL: movi8h_lsl0:
; CHECK: // %bb.0:
@@ -180,14 +216,14 @@ define <8 x i16> @movi8h_fneg() {
;
; CHECK-NOFP16-GI-LABEL: movi8h_fneg:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI19_0
-; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0]
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI21_0
+; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI21_0]
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: movi8h_fneg:
; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI19_0
-; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0]
+; CHECK-FP16-GI-NEXT: adrp x8, .LCPI21_0
+; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI21_0]
; CHECK-FP16-GI-NEXT: ret
ret <8 x i16> <i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280>
}
@@ -275,6 +311,27 @@ define <4 x i16> @mvni4h_lsl8() {
ret <4 x i16> <i16 61439, i16 61439, i16 61439, i16 61439>
}
+define <4 x i16> @mvni4h_neg() {
+; CHECK-NOFP16-SD-LABEL: mvni4h_neg:
+; CHECK-NOFP16-SD: // %bb.0:
+; CHECK-NOFP16-SD-NEXT: mov w8, #33008 // =0x80f0
+; CHECK-NOFP16-SD-NEXT: dup v0.4h, w8
+; CHECK-NOFP16-SD-NEXT: ret
+;
+; CHECK-FP16-LABEL: mvni4h_neg:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: movi v0.4h, #240
+; CHECK-FP16-NEXT: fneg v0.4h, v0.4h
+; CHECK-FP16-NEXT: ret
+;
+; CHECK-NOFP16-GI-LABEL: mvni4h_neg:
+; CHECK-NOFP16-GI: // %bb.0:
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0
+; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI32_0]
+; CHECK-NOFP16-GI-NEXT: ret
+ ret <4 x i16> <i16 33008, i16 33008, i16 33008, i16 33008>
+}
+
define <8 x i16> @mvni8h_lsl0() {
; CHECK-LABEL: mvni8h_lsl0:
; CHECK: // %bb.0:
@@ -306,8 +363,8 @@ define <8 x i16> @mvni8h_neg() {
;
; CHECK-NOFP16-GI-LABEL: mvni8h_neg:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0
-; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI35_0
+; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI35_0]
; CHECK-NOFP16-GI-NEXT: ret
ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008>
}
@@ -486,6 +543,33 @@ define <2 x double> @fmov2d_neg0() {
ret <2 x double> <double -0.0, double -0.0>
}
+define <1 x double> @fmov1d_neg0() {
+; CHECK-NOFP16-SD-LABEL: fmov1d_neg0:
+; CHECK-NOFP16-SD: // %bb.0:
+; CHECK-NOFP16-SD-NEXT: movi d0, #0000000000000000
+; CHECK-NOFP16-SD-NEXT: fneg d0, d0
+; CHECK-NOFP16-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: fmov1d_neg0:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: movi d0, #0000000000000000
+; CHECK-FP16-SD-NEXT: fneg d0, d0
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-NOFP16-GI-LABEL: fmov1d_neg0:
+; CHECK-NOFP16-GI: // %bb.0:
+; CHECK-NOFP16-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-NOFP16-GI-NEXT: fmov d0, x8
+; CHECK-NOFP16-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: fmov1d_neg0:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-FP16-GI-NEXT: fmov d0, x8
+; CHECK-FP16-GI-NEXT: ret
+ ret <1 x double> <double -0.0>
+}
+
define <2 x i32> @movi1d_1() {
; CHECK-NOFP16-SD-LABEL: movi1d_1:
; CHECK-NOFP16-SD: // %bb.0:
@@ -499,14 +583,14 @@ define <2 x i32> @movi1d_1() {
;
; CHECK-NOFP16-GI-LABEL: movi1d_1:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI52_0
-; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI52_0]
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI56_0
+; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI56_0]
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: movi1d_1:
; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI52_0
-; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI52_0]
+; CHECK-FP16-GI-NEXT: adrp x8, .LCPI56_0
+; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI56_0]
; CHECK-FP16-GI-NEXT: ret
ret <2 x i32> <i32 -65536, i32 65535>
}
@@ -517,31 +601,31 @@ define <2 x i32> @movi1d() {
; CHECK-NOFP16-SD-LABEL: movi1d:
; CHECK-NOFP16-SD: // %bb.0:
; CHECK-NOFP16-SD-NEXT: movi d1, #0x00ffffffff0000
-; CHECK-NOFP16-SD-NEXT: adrp x8, .LCPI53_0
-; CHECK-NOFP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI53_0]
+; CHECK-NOFP16-SD-NEXT: adrp x8, .LCPI57_0
+; CHECK-NOFP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0]
; CHECK-NOFP16-SD-NEXT: b test_movi1d
;
; CHECK-FP16-SD-LABEL: movi1d:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: movi d1, #0x00ffffffff0000
-; CHECK-FP16-SD-NEXT: adrp x8, .LCPI53_0
-; CHECK-FP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI53_0]
+; CHECK-FP16-SD-NEXT: adrp x8, .LCPI57_0
+; CHECK-FP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0]
; CHECK-FP16-SD-NEXT: b test_movi1d
;
; CHECK-NOFP16-GI-LABEL: movi1d:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI53_1
-; CHECK-NOFP16-GI-NEXT: adrp x9, .LCPI53_0
-; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI53_1]
-; CHECK-NOFP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI53_0]
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI57_1
+; CHECK-NOFP16-GI-NEXT: adrp x9, .LCPI57_0
+; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_1]
+; CHECK-NOFP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI57_0]
; CHECK-NOFP16-GI-NEXT: b test_movi1d
;
; CHECK-FP16-GI-LABEL: movi1d:
; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI53_1
-; CHECK-FP16-GI-NEXT: adrp x9, .LCPI53_0
-; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI53_1]
-; CHECK-FP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI53_0]
+; CHECK-FP16-GI-NEXT: adrp x8, .LCPI57_1
+; CHECK-FP16-GI-NEXT: adrp x9, .LCPI57_0
+; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_1]
+; CHECK-FP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI57_0]
; CHECK-FP16-GI-NEXT: b test_movi1d
%1 = tail call <2 x i32> @test_movi1d(<2 x i32> <i32 -2147483648, i32 2147450880>, <2 x i32> <i32 -65536, i32 65535>)
ret <2 x i32> %1
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
…ctors. This code was assuming that the vectors were 128bit. Add handling for 64bit vectors. Some of the tests do not apply yet due to not matching non-splat vectors.
ed12ca8 to
5f07b49
Compare
sjoerdmeijer
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like a good and straight forward fix to me, but I haven't reviewed GISel code before, so maybe we can get a second opinion before merging this.
Just a question: I think this is fixing #166400, but isn't mentioned in the description. Do you expect some more work before that can be closed?
Yep - forgot to add that. Thanks! |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/66/builds/21996 Here is the relevant piece of the build log for the reference |
This code was assuming that the vectors were 128bit. Add handling for 64bit vectors. Some of the tests do not apply yet due to not matching non-splat vectors.
Fixes #166400