Skip to content

Conversation

@davemgreen
Copy link
Collaborator

We can generate fpr->fpr instructions for G_SITOFP and G_UITOFP. It was
previously marking the instructions as FPR but then generating GPR instructions
and introducing a copy.

@llvmbot
Copy link
Member

llvmbot commented Aug 20, 2025

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: David Green (davemgreen)

Changes

We can generate fpr->fpr instructions for G_SITOFP and G_UITOFP. It was
previously marking the instructions as FPR but then generating GPR instructions
and introducing a copy.


Full diff: https://github.com/llvm/llvm-project/pull/154489.diff

5 Files Affected:

  • (modified) llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (-93)
  • (modified) llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp (+18-3)
  • (modified) llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h (+4)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir (-25)
  • (modified) llvm/test/CodeGen/AArch64/itofp.ll (+27-108)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index ee34a85a5b507..0bceb322726d1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1102,82 +1102,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
   return true;
 }
 
-static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
-  if (!DstTy.isScalar() || !SrcTy.isScalar())
-    return GenericOpc;
-
-  const unsigned DstSize = DstTy.getSizeInBits();
-  const unsigned SrcSize = SrcTy.getSizeInBits();
-
-  switch (DstSize) {
-  case 32:
-    switch (SrcSize) {
-    case 32:
-      switch (GenericOpc) {
-      case TargetOpcode::G_SITOFP:
-        return AArch64::SCVTFUWSri;
-      case TargetOpcode::G_UITOFP:
-        return AArch64::UCVTFUWSri;
-      case TargetOpcode::G_FPTOSI:
-        return AArch64::FCVTZSUWSr;
-      case TargetOpcode::G_FPTOUI:
-        return AArch64::FCVTZUUWSr;
-      default:
-        return GenericOpc;
-      }
-    case 64:
-      switch (GenericOpc) {
-      case TargetOpcode::G_SITOFP:
-        return AArch64::SCVTFUXSri;
-      case TargetOpcode::G_UITOFP:
-        return AArch64::UCVTFUXSri;
-      case TargetOpcode::G_FPTOSI:
-        return AArch64::FCVTZSUWDr;
-      case TargetOpcode::G_FPTOUI:
-        return AArch64::FCVTZUUWDr;
-      default:
-        return GenericOpc;
-      }
-    default:
-      return GenericOpc;
-    }
-  case 64:
-    switch (SrcSize) {
-    case 32:
-      switch (GenericOpc) {
-      case TargetOpcode::G_SITOFP:
-        return AArch64::SCVTFUWDri;
-      case TargetOpcode::G_UITOFP:
-        return AArch64::UCVTFUWDri;
-      case TargetOpcode::G_FPTOSI:
-        return AArch64::FCVTZSUXSr;
-      case TargetOpcode::G_FPTOUI:
-        return AArch64::FCVTZUUXSr;
-      default:
-        return GenericOpc;
-      }
-    case 64:
-      switch (GenericOpc) {
-      case TargetOpcode::G_SITOFP:
-        return AArch64::SCVTFUXDri;
-      case TargetOpcode::G_UITOFP:
-        return AArch64::UCVTFUXDri;
-      case TargetOpcode::G_FPTOSI:
-        return AArch64::FCVTZSUXDr;
-      case TargetOpcode::G_FPTOUI:
-        return AArch64::FCVTZUUXDr;
-      default:
-        return GenericOpc;
-      }
-    default:
-      return GenericOpc;
-    }
-  default:
-    return GenericOpc;
-  };
-  return GenericOpc;
-}
-
 MachineInstr *
 AArch64InstructionSelector::emitSelect(Register Dst, Register True,
                                        Register False, AArch64CC::CondCode CC,
@@ -3524,23 +3448,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     return true;
   }
 
-  case TargetOpcode::G_SITOFP:
-  case TargetOpcode::G_UITOFP:
-  case TargetOpcode::G_FPTOSI:
-  case TargetOpcode::G_FPTOUI: {
-    const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
-              SrcTy = MRI.getType(I.getOperand(1).getReg());
-    const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
-    if (NewOpc == Opcode)
-      return false;
-
-    I.setDesc(TII.get(NewOpc));
-    constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-    I.setFlags(MachineInstr::NoFPExcept);
-
-    return true;
-  }
-
   case TargetOpcode::G_FREEZE:
     return selectCopy(I, TII, MRI, TRI, RBI);
 
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 31954e7954c03..6311804105239 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -618,6 +618,19 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
   return hasFPConstraints(MI, MRI, TRI, Depth);
 }
 
+bool AArch64RegisterBankInfo::prefersFPUse(const MachineInstr &MI,
+                                            const MachineRegisterInfo &MRI,
+                                            const TargetRegisterInfo &TRI,
+                                            unsigned Depth) const {
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_SITOFP:
+  case TargetOpcode::G_UITOFP:
+    return MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() ==
+           MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+  }
+  return onlyDefinesFP(MI, MRI, TRI, Depth);
+}
+
 bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
   // GMemOperation because we also want to match indexed loads.
   auto *MemOp = cast<GMemOperation>(&MI);
@@ -826,7 +839,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     // Integer to FP conversions don't necessarily happen between GPR -> FPR
     // regbanks. They can also be done within an FPR register.
     Register SrcReg = MI.getOperand(1).getReg();
-    if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
+    if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank &&
+        MRI.getType(SrcReg).getSizeInBits() ==
+            MRI.getType(MI.getOperand(0).getReg()).getSizeInBits())
       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
     else
       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
@@ -895,13 +910,13 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
                  // instruction.
                  //
                  // Int->FP conversion operations are also captured in
-                 // onlyDefinesFP().
+                 // prefersFPUse().
 
                  if (isPHIWithFPConstraints(UseMI, MRI, TRI))
                    return true;
 
                  return onlyUsesFP(UseMI, MRI, TRI) ||
-                        onlyDefinesFP(UseMI, MRI, TRI);
+                        prefersFPUse(UseMI, MRI, TRI);
                }))
       OpRegBankIdx[0] = PMI_FirstFPR;
     break;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index 3abbc1b68b5be..6c30b10401e44 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
@@ -139,6 +139,10 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
   bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
                      const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
 
+  /// \returns true if \p MI can take both fpr and gpr uses, but prefers fp.
+  bool prefersFPUse(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                     const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+
   /// \returns true if the load \p MI is likely loading from a floating-point
   /// type.
   bool isLoadFromFPType(const MachineInstr &MI) const;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir
index 0ee0d9587e88b..a58492114003a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir
@@ -357,31 +357,6 @@ body:             |
     $d0 = COPY %1(s64)
 ...
 
----
-name:            sitofp_s64_s32_fpr_both
-legalized:       true
-regBankSelected: true
-
-registers:
-  - { id: 0, class: fpr }
-  - { id: 1, class: fpr }
-
-body:             |
-  bb.0:
-    liveins: $s0
-
-    ; CHECK-LABEL: name: sitofp_s64_s32_fpr_both
-    ; CHECK: liveins: $s0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
-    ; CHECK-NEXT: [[SCVTFUWDri:%[0-9]+]]:fpr64 = nofpexcept SCVTFUWDri [[COPY1]]
-    ; CHECK-NEXT: $d0 = COPY [[SCVTFUWDri]]
-    %0(s32) = COPY $s0
-    %1(s64) = G_SITOFP %0
-    $d0 = COPY %1(s64)
-...
-
 ---
 name:            sitofp_s64_s64_fpr
 legalized:       true
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index 84c1b97a34e88..caf87a13f283b 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -4,11 +4,6 @@
 ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16,CHECK-NOFP16-GI
 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-GI
 
-; CHECK-FP16-GI:       warning: Instruction selection used fallback path for stofp_load_i64_f16
-; CHECK-FP16-GI-NEXT:  warning: Instruction selection used fallback path for utofp_load_i64_f16
-; CHECK-FP16-GI-NEXT:  warning: Instruction selection used fallback path for stofp_load_i32_f16
-; CHECK-FP16-GI-NEXT:  warning: Instruction selection used fallback path for utofp_load_i32_f16
-
 define double @stofp_i64_f64(i64 %a) {
 ; CHECK-LABEL: stofp_i64_f64:
 ; CHECK:       // %bb.0: // %entry
@@ -844,31 +839,11 @@ entry:
 }
 
 define double @stofp_load_i32_f64(ptr %p) {
-; CHECK-NOFP16-SD-LABEL: stofp_load_i32_f64:
-; CHECK-NOFP16-SD:       // %bb.0: // %entry
-; CHECK-NOFP16-SD-NEXT:    ldr w8, [x0]
-; CHECK-NOFP16-SD-NEXT:    scvtf d0, w8
-; CHECK-NOFP16-SD-NEXT:    ret
-;
-; CHECK-FP16-SD-LABEL: stofp_load_i32_f64:
-; CHECK-FP16-SD:       // %bb.0: // %entry
-; CHECK-FP16-SD-NEXT:    ldr w8, [x0]
-; CHECK-FP16-SD-NEXT:    scvtf d0, w8
-; CHECK-FP16-SD-NEXT:    ret
-;
-; CHECK-NOFP16-GI-LABEL: stofp_load_i32_f64:
-; CHECK-NOFP16-GI:       // %bb.0: // %entry
-; CHECK-NOFP16-GI-NEXT:    ldr s0, [x0]
-; CHECK-NOFP16-GI-NEXT:    fmov w8, s0
-; CHECK-NOFP16-GI-NEXT:    scvtf d0, w8
-; CHECK-NOFP16-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: stofp_load_i32_f64:
-; CHECK-FP16-GI:       // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT:    ldr s0, [x0]
-; CHECK-FP16-GI-NEXT:    fmov w8, s0
-; CHECK-FP16-GI-NEXT:    scvtf d0, w8
-; CHECK-FP16-GI-NEXT:    ret
+; CHECK-LABEL: stofp_load_i32_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr w8, [x0]
+; CHECK-NEXT:    scvtf d0, w8
+; CHECK-NEXT:    ret
 entry:
   %a = load i32, ptr %p
   %c = sitofp i32 %a to double
@@ -936,31 +911,11 @@ entry:
 }
 
 define float @stofp_load_i64_f32(ptr %p) {
-; CHECK-NOFP16-SD-LABEL: stofp_load_i64_f32:
-; CHECK-NOFP16-SD:       // %bb.0: // %entry
-; CHECK-NOFP16-SD-NEXT:    ldr x8, [x0]
-; CHECK-NOFP16-SD-NEXT:    scvtf s0, x8
-; CHECK-NOFP16-SD-NEXT:    ret
-;
-; CHECK-FP16-SD-LABEL: stofp_load_i64_f32:
-; CHECK-FP16-SD:       // %bb.0: // %entry
-; CHECK-FP16-SD-NEXT:    ldr x8, [x0]
-; CHECK-FP16-SD-NEXT:    scvtf s0, x8
-; CHECK-FP16-SD-NEXT:    ret
-;
-; CHECK-NOFP16-GI-LABEL: stofp_load_i64_f32:
-; CHECK-NOFP16-GI:       // %bb.0: // %entry
-; CHECK-NOFP16-GI-NEXT:    ldr d0, [x0]
-; CHECK-NOFP16-GI-NEXT:    fmov x8, d0
-; CHECK-NOFP16-GI-NEXT:    scvtf s0, x8
-; CHECK-NOFP16-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: stofp_load_i64_f32:
-; CHECK-FP16-GI:       // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT:    ldr d0, [x0]
-; CHECK-FP16-GI-NEXT:    fmov x8, d0
-; CHECK-FP16-GI-NEXT:    scvtf s0, x8
-; CHECK-FP16-GI-NEXT:    ret
+; CHECK-LABEL: stofp_load_i64_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    scvtf s0, x8
+; CHECK-NEXT:    ret
 entry:
   %a = load i64, ptr %p
   %c = sitofp i64 %a to float
@@ -968,31 +923,11 @@ entry:
 }
 
 define float @utofp_load_i64_f32(ptr %p) {
-; CHECK-NOFP16-SD-LABEL: utofp_load_i64_f32:
-; CHECK-NOFP16-SD:       // %bb.0: // %entry
-; CHECK-NOFP16-SD-NEXT:    ldr x8, [x0]
-; CHECK-NOFP16-SD-NEXT:    ucvtf s0, x8
-; CHECK-NOFP16-SD-NEXT:    ret
-;
-; CHECK-FP16-SD-LABEL: utofp_load_i64_f32:
-; CHECK-FP16-SD:       // %bb.0: // %entry
-; CHECK-FP16-SD-NEXT:    ldr x8, [x0]
-; CHECK-FP16-SD-NEXT:    ucvtf s0, x8
-; CHECK-FP16-SD-NEXT:    ret
-;
-; CHECK-NOFP16-GI-LABEL: utofp_load_i64_f32:
-; CHECK-NOFP16-GI:       // %bb.0: // %entry
-; CHECK-NOFP16-GI-NEXT:    ldr d0, [x0]
-; CHECK-NOFP16-GI-NEXT:    fmov x8, d0
-; CHECK-NOFP16-GI-NEXT:    ucvtf s0, x8
-; CHECK-NOFP16-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: utofp_load_i64_f32:
-; CHECK-FP16-GI:       // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT:    ldr d0, [x0]
-; CHECK-FP16-GI-NEXT:    fmov x8, d0
-; CHECK-FP16-GI-NEXT:    ucvtf s0, x8
-; CHECK-FP16-GI-NEXT:    ret
+; CHECK-LABEL: utofp_load_i64_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    ucvtf s0, x8
+; CHECK-NEXT:    ret
 entry:
   %a = load i64, ptr %p
   %c = uitofp i64 %a to float
@@ -1072,26 +1007,18 @@ entry:
 }
 
 define half @stofp_load_i64_f16(ptr %p) {
-; CHECK-NOFP16-SD-LABEL: stofp_load_i64_f16:
-; CHECK-NOFP16-SD:       // %bb.0: // %entry
-; CHECK-NOFP16-SD-NEXT:    ldr x8, [x0]
-; CHECK-NOFP16-SD-NEXT:    scvtf s0, x8
-; CHECK-NOFP16-SD-NEXT:    fcvt h0, s0
-; CHECK-NOFP16-SD-NEXT:    ret
+; CHECK-NOFP16-LABEL: stofp_load_i64_f16:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    ldr x8, [x0]
+; CHECK-NOFP16-NEXT:    scvtf s0, x8
+; CHECK-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: stofp_load_i64_f16:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    ldr x8, [x0]
 ; CHECK-FP16-NEXT:    scvtf h0, x8
 ; CHECK-FP16-NEXT:    ret
-;
-; CHECK-NOFP16-GI-LABEL: stofp_load_i64_f16:
-; CHECK-NOFP16-GI:       // %bb.0: // %entry
-; CHECK-NOFP16-GI-NEXT:    ldr d0, [x0]
-; CHECK-NOFP16-GI-NEXT:    fmov x8, d0
-; CHECK-NOFP16-GI-NEXT:    scvtf s0, x8
-; CHECK-NOFP16-GI-NEXT:    fcvt h0, s0
-; CHECK-NOFP16-GI-NEXT:    ret
 entry:
   %a = load i64, ptr %p
   %c = sitofp i64 %a to half
@@ -1099,26 +1026,18 @@ entry:
 }
 
 define half @utofp_load_i64_f16(ptr %p) {
-; CHECK-NOFP16-SD-LABEL: utofp_load_i64_f16:
-; CHECK-NOFP16-SD:       // %bb.0: // %entry
-; CHECK-NOFP16-SD-NEXT:    ldr x8, [x0]
-; CHECK-NOFP16-SD-NEXT:    ucvtf s0, x8
-; CHECK-NOFP16-SD-NEXT:    fcvt h0, s0
-; CHECK-NOFP16-SD-NEXT:    ret
+; CHECK-NOFP16-LABEL: utofp_load_i64_f16:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    ldr x8, [x0]
+; CHECK-NOFP16-NEXT:    ucvtf s0, x8
+; CHECK-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: utofp_load_i64_f16:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    ldr x8, [x0]
 ; CHECK-FP16-NEXT:    ucvtf h0, x8
 ; CHECK-FP16-NEXT:    ret
-;
-; CHECK-NOFP16-GI-LABEL: utofp_load_i64_f16:
-; CHECK-NOFP16-GI:       // %bb.0: // %entry
-; CHECK-NOFP16-GI-NEXT:    ldr d0, [x0]
-; CHECK-NOFP16-GI-NEXT:    fmov x8, d0
-; CHECK-NOFP16-GI-NEXT:    ucvtf s0, x8
-; CHECK-NOFP16-GI-NEXT:    fcvt h0, s0
-; CHECK-NOFP16-GI-NEXT:    ret
 entry:
   %a = load i64, ptr %p
   %c = uitofp i64 %a to half

@github-actions
Copy link

github-actions bot commented Aug 20, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@davemgreen davemgreen force-pushed the gh-gi-removeitofpselect branch from 91f5a2e to b168ba8 Compare August 20, 2025 08:30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
const AArch64RegisterInfo &TRI, unsigned Depth = 0) const;

TRI should probably just be a field in the RegisterBankInfo

We can generate fpr->fpr instructions for G_SITOFP and G_UITOFP. It was
previously marking the instructions as FPR but then generating GPR instructions
and introducing a copy.
@davemgreen davemgreen force-pushed the gh-gi-removeitofpselect branch from b168ba8 to 1e02f86 Compare August 21, 2025 16:05
Copy link
Contributor

@aemerson aemerson left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, thanks!

@davemgreen davemgreen force-pushed the gh-gi-removeitofpselect branch from 1e02f86 to 94bd0a8 Compare August 21, 2025 17:14
@davemgreen davemgreen enabled auto-merge (squash) August 21, 2025 17:14
@davemgreen davemgreen merged commit 1a09581 into llvm:main Aug 21, 2025
9 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants