Skip to content

Conversation

HolyMolyCowMan
Copy link
Contributor

Re-landing #160902.

I somewhat hastily reverted due to build failures on Windows. Looks to be some kind of caching issue as the build later passed without me realising.

@llvmbot
Copy link
Member

llvmbot commented Oct 10, 2025

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: Ryan Cowan (HolyMolyCowMan)

Changes

Re-landing #160902.

I somewhat hastily reverted due to build failures on Windows. Looks to be some kind of caching issue as the build later passed without me realising.


Patch is 291.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162829.diff

18 Files Affected:

  • (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+2)
  • (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+1)
  • (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+1-1)
  • (modified) llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll (+4-6)
  • (modified) llvm/test/CodeGen/AArch64/f16-instructions.ll (+8-10)
  • (modified) llvm/test/CodeGen/AArch64/fcvt-fixed.ll (+176-385)
  • (modified) llvm/test/CodeGen/AArch64/frem-power2.ll (+1-2)
  • (modified) llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll (+11-41)
  • (modified) llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll (+6-24)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll (+1501-1716)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll (+7-10)
  • (modified) llvm/test/CodeGen/AMDGPU/fmed3.ll (+18-28)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.exp.ll (+6-9)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.exp10.ll (+6-9)
  • (modified) llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll (+5-9)
  • (modified) llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll (+32-44)
  • (modified) llvm/test/CodeGen/AMDGPU/maximumnum.ll (+1-2)
  • (modified) llvm/test/CodeGen/AMDGPU/minimumnum.ll (+1-2)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index e2b7a5ead2cd3..3d21f522e97ce 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -695,6 +695,7 @@ def constant_fold_fabs : constant_fold_unary_fp_op_rule<G_FABS>;
 def constant_fold_fsqrt : constant_fold_unary_fp_op_rule<G_FSQRT>;
 def constant_fold_flog2 : constant_fold_unary_fp_op_rule<G_FLOG2>;
 def constant_fold_fptrunc : constant_fold_unary_fp_op_rule<G_FPTRUNC>;
+def constant_fold_fpext : constant_fold_unary_fp_op_rule<G_FPEXT>;
 
 // Fold constant zero int to fp conversions.
 class itof_const_zero_fold_rule<Instruction opcode> : GICombineRule <
@@ -713,6 +714,7 @@ def constant_fold_fp_ops : GICombineGroup<[
   constant_fold_fsqrt,
   constant_fold_flog2,
   constant_fold_fptrunc,
+  constant_fold_fpext,
   itof_const_zero_fold_si,
   itof_const_zero_fold_ui
 ]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index fa0ccd625b504..152c508585968 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1728,6 +1728,7 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI,
     Result.clearSign();
     return Result;
   }
+  case TargetOpcode::G_FPEXT:
   case TargetOpcode::G_FPTRUNC: {
     bool Unused;
     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 639ddcba28468..ecaeff77fcb4b 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -350,7 +350,7 @@ def AArch64PostLegalizerLowering
 // Post-legalization combines which are primarily optimizations.
 def AArch64PostLegalizerCombiner
     : GICombiner<"AArch64PostLegalizerCombinerImpl",
-                       [copy_prop, cast_of_cast_combines, 
+                       [copy_prop, cast_of_cast_combines, constant_fold_fp_ops, 
                         buildvector_of_truncate, integer_of_truncate,
                         mutate_anyext_to_zext, combines_for_extload, 
                         combine_indexed_load_store, sext_trunc_sextload,
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index 322a96aca5db2..e8e563135acc5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -739,14 +739,12 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
 ;
 ; GISEL-LABEL: postidx32_shalf:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    movi d1, #0000000000000000
-; GISEL-NEXT:    ldr h2, [x0], #4
+; GISEL-NEXT:    ldr h1, [x0], #4
 ; GISEL-NEXT:    ; kill: def $h0 killed $h0 def $s0
 ; GISEL-NEXT:    fmov w9, s0
-; GISEL-NEXT:    fcvt s3, h2
-; GISEL-NEXT:    fmov w8, s2
-; GISEL-NEXT:    fcvt s1, h1
-; GISEL-NEXT:    fcmp s3, s1
+; GISEL-NEXT:    fcvt s2, h1
+; GISEL-NEXT:    fmov w8, s1
+; GISEL-NEXT:    fcmp s2, #0.0
 ; GISEL-NEXT:    csel w8, w8, w9, mi
 ; GISEL-NEXT:    strh w8, [x1]
 ; GISEL-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index b234ef7a5ff8b..085170c7ba381 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -782,18 +782,16 @@ define void @test_fccmp(half %in, ptr %out) {
 ;
 ; CHECK-CVT-GI-LABEL: test_fccmp:
 ; CHECK-CVT-GI:       // %bb.0:
-; CHECK-CVT-GI-NEXT:    adrp x8, .LCPI29_0
 ; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-GI-NEXT:    fcvt s2, h0
-; CHECK-CVT-GI-NEXT:    ldr h1, [x8, :lo12:.LCPI29_0]
-; CHECK-CVT-GI-NEXT:    adrp x8, .LCPI29_1
-; CHECK-CVT-GI-NEXT:    ldr h4, [x8, :lo12:.LCPI29_1]
+; CHECK-CVT-GI-NEXT:    fcvt s1, h0
+; CHECK-CVT-GI-NEXT:    fmov s2, #5.00000000
+; CHECK-CVT-GI-NEXT:    adrp x8, .LCPI29_0
+; CHECK-CVT-GI-NEXT:    fmov s3, #8.00000000
+; CHECK-CVT-GI-NEXT:    fcmp s1, s2
+; CHECK-CVT-GI-NEXT:    ldr h2, [x8, :lo12:.LCPI29_0]
 ; CHECK-CVT-GI-NEXT:    fmov w8, s0
-; CHECK-CVT-GI-NEXT:    fcvt s3, h1
-; CHECK-CVT-GI-NEXT:    fmov w9, s1
-; CHECK-CVT-GI-NEXT:    fcvt s4, h4
-; CHECK-CVT-GI-NEXT:    fcmp s2, s3
-; CHECK-CVT-GI-NEXT:    fccmp s2, s4, #4, mi
+; CHECK-CVT-GI-NEXT:    fmov w9, s2
+; CHECK-CVT-GI-NEXT:    fccmp s1, s3, #4, mi
 ; CHECK-CVT-GI-NEXT:    csel w8, w8, w9, gt
 ; CHECK-CVT-GI-NEXT:    strh w8, [x0]
 ; CHECK-CVT-GI-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 7409bfb91454c..743d1604388de 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -149,33 +149,21 @@ define i64 @fcvtzs_f64_i64_64(double %dbl) {
 }
 
 define i32 @fcvtzs_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_7:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_7:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0, #7
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI8_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI8_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI8_0
@@ -189,33 +177,21 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
 }
 
 define i32 @fcvtzs_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_15:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_15:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0, #15
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI9_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI9_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI9_0
@@ -229,33 +205,21 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
 }
 
 define i64 @fcvtzs_f16_i64_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_7:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_7:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0, #7
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI10_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI10_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI10_0
@@ -269,33 +233,21 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
 }
 
 define i64 @fcvtzs_f16_i64_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_15:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_15:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0, #15
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI11_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI11_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI11_0
@@ -453,33 +405,21 @@ define i64 @fcvtzu_f64_i64_64(double %dbl) {
 }
 
 define i32 @fcvtzu_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_7:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_7:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0, #7
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI20_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI20_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI20_0
@@ -493,33 +433,21 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
 }
 
 define i32 @fcvtzu_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_15:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_15:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0, #15
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI21_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI21_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_15:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI21_0
@@ -533,33 +461,21 @@ define i32 @fcvtzu_f16_i32_15(half %flt) {
 }
 
 define i64 @fcvtzu_f16_i64_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_7:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzu x0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i64_7:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzu x0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_7:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzu x0, h0, #7
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI22_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI22_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzu x0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_7:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI22_0
@@ -573,33 +489,21 @@ define i64 @fcvtzu_f16_i64_7(half %flt) {
 }
 
 define i64 @fcvtzu_f16_i64_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_15:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzu x0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i64_15:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzu x0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_15:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzu x0, h0, #15
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI23_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI23_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzu x0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_15:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI23_0
@@ -774,13 +678,11 @@ define half @scvtf_f16_i32_7(i32 %int) {
 ;
 ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    scvtf s0, w0
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI32_0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI32_0]
+; CHECK-GI-NO16-NEXT:    scvtf s1, w0
+; CHECK-GI-NO16-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-GI-NO16-NEXT:    fcvt h1, s1
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fdiv s0, s1, s0
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    ret
 ;
@@ -814,13 +716,11 @@ define half @scvtf_f16_i32_15(i32 %int) {
 ;
 ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    scvtf s0, w0
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI33_0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI33_0]
+; CHECK-GI-NO16-NEXT:    scvtf s1, w0
+; CHECK-GI-NO16-NEXT:    movi v0.2s, #71, lsl #24
+; CHECK-GI-NO16-NEXT:    fcvt h1, s1
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fdiv s0, s1, s0
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    ret
 ;
@@ -854,13 +754,11 @@ define half @scvtf_f16_i64_7(i64 %long) {
 ;
 ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    scvtf s0, x0
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI34_0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NO16-NEXT:    scvtf s1, x0
+; CHECK-GI-NO16-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-GI-NO16-NEXT:    fcvt h1, s1
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fdiv s0, s1, s0
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    ret
 ;
@@ -894,13 +792,11 @@ define half @scvtf_f16_i64_15(i64 %long) {
 ;
 ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    scvtf s0, x0
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI35_0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NO16-NEXT:    scvtf s1, x0
+; CHECK-GI-NO16-NEXT:    movi v0.2s, #71, lsl #24
+; CHECK-GI-NO16-NEXT:    fcvt h1, s1
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fdiv s0, s1, s0
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    ret
 ;
@@ -1078,13 +974,11 @@ define half @ucvtf_f16_i32_7(i32 %int) {
 ;
 ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    ucvtf s0, w0
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI44_0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI44_0]
+; CHECK-GI-NO16-NEXT:    ucvtf s1, w0
+; CHECK-GI-NO16-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-GI-NO16-NEXT:    fcvt h1, s1
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fdiv s0, s1, s0
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    ret
 ;
@@ -1118,13 +1012,11 @@ define half @ucvtf_f16_i32_15(i32 %int) {
 ;
 ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15:
 ; CHECK-GI-NO16:       // %bb.0:
...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Oct 10, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Ryan Cowan (HolyMolyCowMan)

Changes

Re-landing #160902.

I somewhat hastily reverted due to build failures on Windows. Looks to be some kind of caching issue as the build later passed without me realising.


Patch is 291.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162829.diff

18 Files Affected:

  • (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+2)
  • (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+1)
  • (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+1-1)
  • (modified) llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll (+4-6)
  • (modified) llvm/test/CodeGen/AArch64/f16-instructions.ll (+8-10)
  • (modified) llvm/test/CodeGen/AArch64/fcvt-fixed.ll (+176-385)
  • (modified) llvm/test/CodeGen/AArch64/frem-power2.ll (+1-2)
  • (modified) llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll (+11-41)
  • (modified) llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll (+6-24)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll (+1501-1716)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll (+7-10)
  • (modified) llvm/test/CodeGen/AMDGPU/fmed3.ll (+18-28)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.exp.ll (+6-9)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.exp10.ll (+6-9)
  • (modified) llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll (+5-9)
  • (modified) llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll (+32-44)
  • (modified) llvm/test/CodeGen/AMDGPU/maximumnum.ll (+1-2)
  • (modified) llvm/test/CodeGen/AMDGPU/minimumnum.ll (+1-2)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index e2b7a5ead2cd3..3d21f522e97ce 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -695,6 +695,7 @@ def constant_fold_fabs : constant_fold_unary_fp_op_rule<G_FABS>;
 def constant_fold_fsqrt : constant_fold_unary_fp_op_rule<G_FSQRT>;
 def constant_fold_flog2 : constant_fold_unary_fp_op_rule<G_FLOG2>;
 def constant_fold_fptrunc : constant_fold_unary_fp_op_rule<G_FPTRUNC>;
+def constant_fold_fpext : constant_fold_unary_fp_op_rule<G_FPEXT>;
 
 // Fold constant zero int to fp conversions.
 class itof_const_zero_fold_rule<Instruction opcode> : GICombineRule <
@@ -713,6 +714,7 @@ def constant_fold_fp_ops : GICombineGroup<[
   constant_fold_fsqrt,
   constant_fold_flog2,
   constant_fold_fptrunc,
+  constant_fold_fpext,
   itof_const_zero_fold_si,
   itof_const_zero_fold_ui
 ]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index fa0ccd625b504..152c508585968 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1728,6 +1728,7 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI,
     Result.clearSign();
     return Result;
   }
+  case TargetOpcode::G_FPEXT:
   case TargetOpcode::G_FPTRUNC: {
     bool Unused;
     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 639ddcba28468..ecaeff77fcb4b 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -350,7 +350,7 @@ def AArch64PostLegalizerLowering
 // Post-legalization combines which are primarily optimizations.
 def AArch64PostLegalizerCombiner
     : GICombiner<"AArch64PostLegalizerCombinerImpl",
-                       [copy_prop, cast_of_cast_combines, 
+                       [copy_prop, cast_of_cast_combines, constant_fold_fp_ops, 
                         buildvector_of_truncate, integer_of_truncate,
                         mutate_anyext_to_zext, combines_for_extload, 
                         combine_indexed_load_store, sext_trunc_sextload,
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index 322a96aca5db2..e8e563135acc5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -739,14 +739,12 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
 ;
 ; GISEL-LABEL: postidx32_shalf:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    movi d1, #0000000000000000
-; GISEL-NEXT:    ldr h2, [x0], #4
+; GISEL-NEXT:    ldr h1, [x0], #4
 ; GISEL-NEXT:    ; kill: def $h0 killed $h0 def $s0
 ; GISEL-NEXT:    fmov w9, s0
-; GISEL-NEXT:    fcvt s3, h2
-; GISEL-NEXT:    fmov w8, s2
-; GISEL-NEXT:    fcvt s1, h1
-; GISEL-NEXT:    fcmp s3, s1
+; GISEL-NEXT:    fcvt s2, h1
+; GISEL-NEXT:    fmov w8, s1
+; GISEL-NEXT:    fcmp s2, #0.0
 ; GISEL-NEXT:    csel w8, w8, w9, mi
 ; GISEL-NEXT:    strh w8, [x1]
 ; GISEL-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index b234ef7a5ff8b..085170c7ba381 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -782,18 +782,16 @@ define void @test_fccmp(half %in, ptr %out) {
 ;
 ; CHECK-CVT-GI-LABEL: test_fccmp:
 ; CHECK-CVT-GI:       // %bb.0:
-; CHECK-CVT-GI-NEXT:    adrp x8, .LCPI29_0
 ; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-GI-NEXT:    fcvt s2, h0
-; CHECK-CVT-GI-NEXT:    ldr h1, [x8, :lo12:.LCPI29_0]
-; CHECK-CVT-GI-NEXT:    adrp x8, .LCPI29_1
-; CHECK-CVT-GI-NEXT:    ldr h4, [x8, :lo12:.LCPI29_1]
+; CHECK-CVT-GI-NEXT:    fcvt s1, h0
+; CHECK-CVT-GI-NEXT:    fmov s2, #5.00000000
+; CHECK-CVT-GI-NEXT:    adrp x8, .LCPI29_0
+; CHECK-CVT-GI-NEXT:    fmov s3, #8.00000000
+; CHECK-CVT-GI-NEXT:    fcmp s1, s2
+; CHECK-CVT-GI-NEXT:    ldr h2, [x8, :lo12:.LCPI29_0]
 ; CHECK-CVT-GI-NEXT:    fmov w8, s0
-; CHECK-CVT-GI-NEXT:    fcvt s3, h1
-; CHECK-CVT-GI-NEXT:    fmov w9, s1
-; CHECK-CVT-GI-NEXT:    fcvt s4, h4
-; CHECK-CVT-GI-NEXT:    fcmp s2, s3
-; CHECK-CVT-GI-NEXT:    fccmp s2, s4, #4, mi
+; CHECK-CVT-GI-NEXT:    fmov w9, s2
+; CHECK-CVT-GI-NEXT:    fccmp s1, s3, #4, mi
 ; CHECK-CVT-GI-NEXT:    csel w8, w8, w9, gt
 ; CHECK-CVT-GI-NEXT:    strh w8, [x0]
 ; CHECK-CVT-GI-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 7409bfb91454c..743d1604388de 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -149,33 +149,21 @@ define i64 @fcvtzs_f64_i64_64(double %dbl) {
 }
 
 define i32 @fcvtzs_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_7:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_7:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0, #7
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI8_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI8_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI8_0
@@ -189,33 +177,21 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
 }
 
 define i32 @fcvtzs_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_15:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_15:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0, #15
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI9_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI9_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI9_0
@@ -229,33 +205,21 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
 }
 
 define i64 @fcvtzs_f16_i64_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_7:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_7:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0, #7
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI10_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI10_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI10_0
@@ -269,33 +233,21 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
 }
 
 define i64 @fcvtzs_f16_i64_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_15:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_15:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0, #15
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI11_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI11_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI11_0
@@ -453,33 +405,21 @@ define i64 @fcvtzu_f64_i64_64(double %dbl) {
 }
 
 define i32 @fcvtzu_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_7:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_7:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0, #7
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI20_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI20_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI20_0
@@ -493,33 +433,21 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
 }
 
 define i32 @fcvtzu_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_15:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_15:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0, #15
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI21_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI21_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_15:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI21_0
@@ -533,33 +461,21 @@ define i32 @fcvtzu_f16_i32_15(half %flt) {
 }
 
 define i64 @fcvtzu_f16_i64_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_7:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzu x0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i64_7:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzu x0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_7:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzu x0, h0, #7
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI22_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI22_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzu x0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_7:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI22_0
@@ -573,33 +489,21 @@ define i64 @fcvtzu_f16_i64_7(half %flt) {
 }
 
 define i64 @fcvtzu_f16_i64_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_15:
-; CHECK-SD-NO16:       // %bb.0:
-; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT:    fcvt h0, s0
-; CHECK-SD-NO16-NEXT:    fcvt s0, h0
-; CHECK-SD-NO16-NEXT:    fcvtzu x0, s0
-; CHECK-SD-NO16-NEXT:    ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i64_15:
+; CHECK-NO16:       // %bb.0:
+; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-NO16-NEXT:    fcvt h0, s0
+; CHECK-NO16-NEXT:    fcvt s0, h0
+; CHECK-NO16-NEXT:    fcvtzu x0, s0
+; CHECK-NO16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_15:
 ; CHECK-SD-FP16:       // %bb.0:
 ; CHECK-SD-FP16-NEXT:    fcvtzu x0, h0, #15
 ; CHECK-SD-FP16-NEXT:    ret
 ;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15:
-; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI23_0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI23_0]
-; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fcvtzu x0, s0
-; CHECK-GI-NO16-NEXT:    ret
-;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_15:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI23_0
@@ -774,13 +678,11 @@ define half @scvtf_f16_i32_7(i32 %int) {
 ;
 ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    scvtf s0, w0
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI32_0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI32_0]
+; CHECK-GI-NO16-NEXT:    scvtf s1, w0
+; CHECK-GI-NO16-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-GI-NO16-NEXT:    fcvt h1, s1
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fdiv s0, s1, s0
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    ret
 ;
@@ -814,13 +716,11 @@ define half @scvtf_f16_i32_15(i32 %int) {
 ;
 ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    scvtf s0, w0
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI33_0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI33_0]
+; CHECK-GI-NO16-NEXT:    scvtf s1, w0
+; CHECK-GI-NO16-NEXT:    movi v0.2s, #71, lsl #24
+; CHECK-GI-NO16-NEXT:    fcvt h1, s1
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fdiv s0, s1, s0
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    ret
 ;
@@ -854,13 +754,11 @@ define half @scvtf_f16_i64_7(i64 %long) {
 ;
 ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    scvtf s0, x0
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI34_0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NO16-NEXT:    scvtf s1, x0
+; CHECK-GI-NO16-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-GI-NO16-NEXT:    fcvt h1, s1
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fdiv s0, s1, s0
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    ret
 ;
@@ -894,13 +792,11 @@ define half @scvtf_f16_i64_15(i64 %long) {
 ;
 ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    scvtf s0, x0
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI35_0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NO16-NEXT:    scvtf s1, x0
+; CHECK-GI-NO16-NEXT:    movi v0.2s, #71, lsl #24
+; CHECK-GI-NO16-NEXT:    fcvt h1, s1
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fdiv s0, s1, s0
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    ret
 ;
@@ -1078,13 +974,11 @@ define half @ucvtf_f16_i32_7(i32 %int) {
 ;
 ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    ucvtf s0, w0
-; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI44_0
-; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI44_0]
+; CHECK-GI-NO16-NEXT:    ucvtf s1, w0
+; CHECK-GI-NO16-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-GI-NO16-NEXT:    fcvt h1, s1
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
-; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fdiv s0, s1, s0
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    ret
 ;
@@ -1118,13 +1012,11 @@ define half @ucvtf_f16_i32_15(i32 %int) {
 ;
 ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15:
 ; CHECK-GI-NO16:       // %bb.0:
...
[truncated]

@arsenm arsenm enabled auto-merge (squash) October 10, 2025 11:55
@arsenm arsenm merged commit b298421 into llvm:main Oct 10, 2025
24 of 25 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants