-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64][GlobalISel] Add G_FPEXT(G_FCONSTANT) folding #162829
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: Ryan Cowan (HolyMolyCowMan) ChangesRe-landing #160902. I somewhat hastily reverted due to build failures on Windows. Looks to be some kind of caching issue as the build later passed without me realising. Patch is 291.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162829.diff 18 Files Affected:
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index e2b7a5ead2cd3..3d21f522e97ce 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -695,6 +695,7 @@ def constant_fold_fabs : constant_fold_unary_fp_op_rule<G_FABS>;
def constant_fold_fsqrt : constant_fold_unary_fp_op_rule<G_FSQRT>;
def constant_fold_flog2 : constant_fold_unary_fp_op_rule<G_FLOG2>;
def constant_fold_fptrunc : constant_fold_unary_fp_op_rule<G_FPTRUNC>;
+def constant_fold_fpext : constant_fold_unary_fp_op_rule<G_FPEXT>;
// Fold constant zero int to fp conversions.
class itof_const_zero_fold_rule<Instruction opcode> : GICombineRule <
@@ -713,6 +714,7 @@ def constant_fold_fp_ops : GICombineGroup<[
constant_fold_fsqrt,
constant_fold_flog2,
constant_fold_fptrunc,
+ constant_fold_fpext,
itof_const_zero_fold_si,
itof_const_zero_fold_ui
]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index fa0ccd625b504..152c508585968 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1728,6 +1728,7 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI,
Result.clearSign();
return Result;
}
+ case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC: {
bool Unused;
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 639ddcba28468..ecaeff77fcb4b 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -350,7 +350,7 @@ def AArch64PostLegalizerLowering
// Post-legalization combines which are primarily optimizations.
def AArch64PostLegalizerCombiner
: GICombiner<"AArch64PostLegalizerCombinerImpl",
- [copy_prop, cast_of_cast_combines,
+ [copy_prop, cast_of_cast_combines, constant_fold_fp_ops,
buildvector_of_truncate, integer_of_truncate,
mutate_anyext_to_zext, combines_for_extload,
combine_indexed_load_store, sext_trunc_sextload,
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index 322a96aca5db2..e8e563135acc5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -739,14 +739,12 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
;
; GISEL-LABEL: postidx32_shalf:
; GISEL: ; %bb.0:
-; GISEL-NEXT: movi d1, #0000000000000000
-; GISEL-NEXT: ldr h2, [x0], #4
+; GISEL-NEXT: ldr h1, [x0], #4
; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
; GISEL-NEXT: fmov w9, s0
-; GISEL-NEXT: fcvt s3, h2
-; GISEL-NEXT: fmov w8, s2
-; GISEL-NEXT: fcvt s1, h1
-; GISEL-NEXT: fcmp s3, s1
+; GISEL-NEXT: fcvt s2, h1
+; GISEL-NEXT: fmov w8, s1
+; GISEL-NEXT: fcmp s2, #0.0
; GISEL-NEXT: csel w8, w8, w9, mi
; GISEL-NEXT: strh w8, [x1]
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index b234ef7a5ff8b..085170c7ba381 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -782,18 +782,16 @@ define void @test_fccmp(half %in, ptr %out) {
;
; CHECK-CVT-GI-LABEL: test_fccmp:
; CHECK-CVT-GI: // %bb.0:
-; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-GI-NEXT: fcvt s2, h0
-; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0]
-; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1
-; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1]
+; CHECK-CVT-GI-NEXT: fcvt s1, h0
+; CHECK-CVT-GI-NEXT: fmov s2, #5.00000000
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
+; CHECK-CVT-GI-NEXT: fmov s3, #8.00000000
+; CHECK-CVT-GI-NEXT: fcmp s1, s2
+; CHECK-CVT-GI-NEXT: ldr h2, [x8, :lo12:.LCPI29_0]
; CHECK-CVT-GI-NEXT: fmov w8, s0
-; CHECK-CVT-GI-NEXT: fcvt s3, h1
-; CHECK-CVT-GI-NEXT: fmov w9, s1
-; CHECK-CVT-GI-NEXT: fcvt s4, h4
-; CHECK-CVT-GI-NEXT: fcmp s2, s3
-; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi
+; CHECK-CVT-GI-NEXT: fmov w9, s2
+; CHECK-CVT-GI-NEXT: fccmp s1, s3, #4, mi
; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
; CHECK-CVT-GI-NEXT: strh w8, [x0]
; CHECK-CVT-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 7409bfb91454c..743d1604388de 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -149,33 +149,21 @@ define i64 @fcvtzs_f64_i64_64(double %dbl) {
}
define i32 @fcvtzs_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI8_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI8_0
@@ -189,33 +177,21 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
}
define i32 @fcvtzs_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #15
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI9_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI9_0
@@ -229,33 +205,21 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
}
define i64 @fcvtzs_f16_i64_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs x0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI10_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI10_0
@@ -269,33 +233,21 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
}
define i64 @fcvtzs_f16_i64_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs x0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #15
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI11_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI11_0
@@ -453,33 +405,21 @@ define i64 @fcvtzu_f64_i64_64(double %dbl) {
}
define i32 @fcvtzu_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzu w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI20_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzu w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI20_0
@@ -493,33 +433,21 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
}
define i32 @fcvtzu_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzu w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #15
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI21_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzu w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI21_0
@@ -533,33 +461,21 @@ define i32 @fcvtzu_f16_i32_15(half %flt) {
}
define i64 @fcvtzu_f16_i64_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzu x0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i64_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzu x0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzu x0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI22_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzu x0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI22_0
@@ -573,33 +489,21 @@ define i64 @fcvtzu_f16_i64_7(half %flt) {
}
define i64 @fcvtzu_f16_i64_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzu x0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i64_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzu x0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzu x0, h0, #15
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI23_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzu x0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI23_0
@@ -774,13 +678,11 @@ define half @scvtf_f16_i32_7(i32 %int) {
;
; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: scvtf s0, w0
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI32_0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0]
+; CHECK-GI-NO16-NEXT: scvtf s1, w0
+; CHECK-GI-NO16-NEXT: movi v0.2s, #67, lsl #24
+; CHECK-GI-NO16-NEXT: fcvt h1, s1
; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT: fdiv s0, s1, s0
; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: ret
;
@@ -814,13 +716,11 @@ define half @scvtf_f16_i32_15(i32 %int) {
;
; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: scvtf s0, w0
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI33_0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0]
+; CHECK-GI-NO16-NEXT: scvtf s1, w0
+; CHECK-GI-NO16-NEXT: movi v0.2s, #71, lsl #24
+; CHECK-GI-NO16-NEXT: fcvt h1, s1
; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT: fdiv s0, s1, s0
; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: ret
;
@@ -854,13 +754,11 @@ define half @scvtf_f16_i64_7(i64 %long) {
;
; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: scvtf s0, x0
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI34_0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NO16-NEXT: scvtf s1, x0
+; CHECK-GI-NO16-NEXT: movi v0.2s, #67, lsl #24
+; CHECK-GI-NO16-NEXT: fcvt h1, s1
; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT: fdiv s0, s1, s0
; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: ret
;
@@ -894,13 +792,11 @@ define half @scvtf_f16_i64_15(i64 %long) {
;
; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: scvtf s0, x0
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI35_0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NO16-NEXT: scvtf s1, x0
+; CHECK-GI-NO16-NEXT: movi v0.2s, #71, lsl #24
+; CHECK-GI-NO16-NEXT: fcvt h1, s1
; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT: fdiv s0, s1, s0
; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: ret
;
@@ -1078,13 +974,11 @@ define half @ucvtf_f16_i32_7(i32 %int) {
;
; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: ucvtf s0, w0
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI44_0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0]
+; CHECK-GI-NO16-NEXT: ucvtf s1, w0
+; CHECK-GI-NO16-NEXT: movi v0.2s, #67, lsl #24
+; CHECK-GI-NO16-NEXT: fcvt h1, s1
; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT: fdiv s0, s1, s0
; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: ret
;
@@ -1118,13 +1012,11 @@ define half @ucvtf_f16_i32_15(i32 %int) {
;
; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
...
[truncated]
|
@llvm/pr-subscribers-backend-amdgpu Author: Ryan Cowan (HolyMolyCowMan) ChangesRe-landing #160902. I somewhat hastily reverted due to build failures on Windows. Looks to be some kind of caching issue as the build later passed without me realising. Patch is 291.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162829.diff 18 Files Affected:
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index e2b7a5ead2cd3..3d21f522e97ce 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -695,6 +695,7 @@ def constant_fold_fabs : constant_fold_unary_fp_op_rule<G_FABS>;
def constant_fold_fsqrt : constant_fold_unary_fp_op_rule<G_FSQRT>;
def constant_fold_flog2 : constant_fold_unary_fp_op_rule<G_FLOG2>;
def constant_fold_fptrunc : constant_fold_unary_fp_op_rule<G_FPTRUNC>;
+def constant_fold_fpext : constant_fold_unary_fp_op_rule<G_FPEXT>;
// Fold constant zero int to fp conversions.
class itof_const_zero_fold_rule<Instruction opcode> : GICombineRule <
@@ -713,6 +714,7 @@ def constant_fold_fp_ops : GICombineGroup<[
constant_fold_fsqrt,
constant_fold_flog2,
constant_fold_fptrunc,
+ constant_fold_fpext,
itof_const_zero_fold_si,
itof_const_zero_fold_ui
]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index fa0ccd625b504..152c508585968 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1728,6 +1728,7 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI,
Result.clearSign();
return Result;
}
+ case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC: {
bool Unused;
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 639ddcba28468..ecaeff77fcb4b 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -350,7 +350,7 @@ def AArch64PostLegalizerLowering
// Post-legalization combines which are primarily optimizations.
def AArch64PostLegalizerCombiner
: GICombiner<"AArch64PostLegalizerCombinerImpl",
- [copy_prop, cast_of_cast_combines,
+ [copy_prop, cast_of_cast_combines, constant_fold_fp_ops,
buildvector_of_truncate, integer_of_truncate,
mutate_anyext_to_zext, combines_for_extload,
combine_indexed_load_store, sext_trunc_sextload,
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index 322a96aca5db2..e8e563135acc5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -739,14 +739,12 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
;
; GISEL-LABEL: postidx32_shalf:
; GISEL: ; %bb.0:
-; GISEL-NEXT: movi d1, #0000000000000000
-; GISEL-NEXT: ldr h2, [x0], #4
+; GISEL-NEXT: ldr h1, [x0], #4
; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
; GISEL-NEXT: fmov w9, s0
-; GISEL-NEXT: fcvt s3, h2
-; GISEL-NEXT: fmov w8, s2
-; GISEL-NEXT: fcvt s1, h1
-; GISEL-NEXT: fcmp s3, s1
+; GISEL-NEXT: fcvt s2, h1
+; GISEL-NEXT: fmov w8, s1
+; GISEL-NEXT: fcmp s2, #0.0
; GISEL-NEXT: csel w8, w8, w9, mi
; GISEL-NEXT: strh w8, [x1]
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index b234ef7a5ff8b..085170c7ba381 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -782,18 +782,16 @@ define void @test_fccmp(half %in, ptr %out) {
;
; CHECK-CVT-GI-LABEL: test_fccmp:
; CHECK-CVT-GI: // %bb.0:
-; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-GI-NEXT: fcvt s2, h0
-; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0]
-; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1
-; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1]
+; CHECK-CVT-GI-NEXT: fcvt s1, h0
+; CHECK-CVT-GI-NEXT: fmov s2, #5.00000000
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
+; CHECK-CVT-GI-NEXT: fmov s3, #8.00000000
+; CHECK-CVT-GI-NEXT: fcmp s1, s2
+; CHECK-CVT-GI-NEXT: ldr h2, [x8, :lo12:.LCPI29_0]
; CHECK-CVT-GI-NEXT: fmov w8, s0
-; CHECK-CVT-GI-NEXT: fcvt s3, h1
-; CHECK-CVT-GI-NEXT: fmov w9, s1
-; CHECK-CVT-GI-NEXT: fcvt s4, h4
-; CHECK-CVT-GI-NEXT: fcmp s2, s3
-; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi
+; CHECK-CVT-GI-NEXT: fmov w9, s2
+; CHECK-CVT-GI-NEXT: fccmp s1, s3, #4, mi
; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
; CHECK-CVT-GI-NEXT: strh w8, [x0]
; CHECK-CVT-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 7409bfb91454c..743d1604388de 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -149,33 +149,21 @@ define i64 @fcvtzs_f64_i64_64(double %dbl) {
}
define i32 @fcvtzs_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI8_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI8_0
@@ -189,33 +177,21 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
}
define i32 @fcvtzs_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #15
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI9_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI9_0
@@ -229,33 +205,21 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
}
define i64 @fcvtzs_f16_i64_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs x0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI10_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI10_0
@@ -269,33 +233,21 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
}
define i64 @fcvtzs_f16_i64_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs x0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #15
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI11_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI11_0
@@ -453,33 +405,21 @@ define i64 @fcvtzu_f64_i64_64(double %dbl) {
}
define i32 @fcvtzu_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzu w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI20_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzu w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI20_0
@@ -493,33 +433,21 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
}
define i32 @fcvtzu_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzu w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #15
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI21_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzu w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI21_0
@@ -533,33 +461,21 @@ define i32 @fcvtzu_f16_i32_15(half %flt) {
}
define i64 @fcvtzu_f16_i64_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzu x0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i64_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzu x0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzu x0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI22_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzu x0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI22_0
@@ -573,33 +489,21 @@ define i64 @fcvtzu_f16_i64_7(half %flt) {
}
define i64 @fcvtzu_f16_i64_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzu x0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i64_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzu x0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzu x0, h0, #15
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI23_0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0]
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzu x0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI23_0
@@ -774,13 +678,11 @@ define half @scvtf_f16_i32_7(i32 %int) {
;
; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: scvtf s0, w0
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI32_0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0]
+; CHECK-GI-NO16-NEXT: scvtf s1, w0
+; CHECK-GI-NO16-NEXT: movi v0.2s, #67, lsl #24
+; CHECK-GI-NO16-NEXT: fcvt h1, s1
; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT: fdiv s0, s1, s0
; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: ret
;
@@ -814,13 +716,11 @@ define half @scvtf_f16_i32_15(i32 %int) {
;
; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: scvtf s0, w0
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI33_0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0]
+; CHECK-GI-NO16-NEXT: scvtf s1, w0
+; CHECK-GI-NO16-NEXT: movi v0.2s, #71, lsl #24
+; CHECK-GI-NO16-NEXT: fcvt h1, s1
; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT: fdiv s0, s1, s0
; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: ret
;
@@ -854,13 +754,11 @@ define half @scvtf_f16_i64_7(i64 %long) {
;
; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: scvtf s0, x0
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI34_0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NO16-NEXT: scvtf s1, x0
+; CHECK-GI-NO16-NEXT: movi v0.2s, #67, lsl #24
+; CHECK-GI-NO16-NEXT: fcvt h1, s1
; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT: fdiv s0, s1, s0
; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: ret
;
@@ -894,13 +792,11 @@ define half @scvtf_f16_i64_15(i64 %long) {
;
; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: scvtf s0, x0
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI35_0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NO16-NEXT: scvtf s1, x0
+; CHECK-GI-NO16-NEXT: movi v0.2s, #71, lsl #24
+; CHECK-GI-NO16-NEXT: fcvt h1, s1
; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT: fdiv s0, s1, s0
; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: ret
;
@@ -1078,13 +974,11 @@ define half @ucvtf_f16_i32_7(i32 %int) {
;
; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: ucvtf s0, w0
-; CHECK-GI-NO16-NEXT: adrp x8, .LCPI44_0
-; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0]
+; CHECK-GI-NO16-NEXT: ucvtf s1, w0
+; CHECK-GI-NO16-NEXT: movi v0.2s, #67, lsl #24
+; CHECK-GI-NO16-NEXT: fcvt h1, s1
; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT: fdiv s0, s1, s0
; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: ret
;
@@ -1118,13 +1012,11 @@ define half @ucvtf_f16_i32_15(i32 %int) {
;
; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
...
[truncated]
|
Re-landing #160902.
I somewhat hastily reverted due to build failures on Windows. Looks to be some kind of caching issue as the build later passed without me realising.