[X86][GlobalISel] Support fp80 for G_FPTRUNC and G_FPEXT #141611

e-kud · 2025-05-27T14:34:56Z

We intentionally omit s32->s64 and s64->s32 variants of stores and loads for X87 because during legalization we can't determine whether it is a floating point store or an integer one.

llvmbot · 2025-05-27T14:35:30Z

@llvm/pr-subscribers-llvm-globalisel

Author: Evgenii Kudriashov (e-kud)

Changes

We intentionally omit s32->s64 and s64->s32 variants of stores and loads for X87 because during legalization we can't determine whether it is a floating point store or an integer one.

Full diff: https://github.com/llvm/llvm-project/pull/141611.diff

4 Files Affected:

(modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp (+48-13)
(modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.h (+3)
(removed) llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll (-12)
(added) llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll (+279)

diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index f008cb1bea839..58215d4e00202 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -376,9 +376,15 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
     Action.legalForTypesWithMemDesc({{s8, p0, s8, 1},
                                      {s16, p0, s16, 1},
                                      {s32, p0, s32, 1},
-                                     {s80, p0, s80, 1},
                                      {p0, p0, p0, 1},
                                      {v4s8, p0, v4s8, 1}});
+
+    if (UseX87)
+      Action.legalForTypesWithMemDesc({{s80, p0, s32, 1},
+                                       {s80, p0, s64, 1},
+                                       {s32, p0, s80, 1},
+                                       {s64, p0, s80, 1},
+                                       {s80, p0, s80, 1}});
     if (Is64Bit)
       Action.legalForTypesWithMemDesc(
           {{s64, p0, s64, 1}, {v2s32, p0, v2s32, 1}});
@@ -476,18 +482,17 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
       .widenScalarToNextPow2(1);
 
   // fp conversions
-  getActionDefinitionsBuilder(G_FPEXT).legalIf([=](const LegalityQuery &Query) {
-    return (HasSSE2 && typePairInSet(0, 1, {{s64, s32}})(Query)) ||
-           (HasAVX && typePairInSet(0, 1, {{v4s64, v4s32}})(Query)) ||
-           (HasAVX512 && typePairInSet(0, 1, {{v8s64, v8s32}})(Query));
-  });
-
-  getActionDefinitionsBuilder(G_FPTRUNC).legalIf(
-      [=](const LegalityQuery &Query) {
-        return (HasSSE2 && typePairInSet(0, 1, {{s32, s64}})(Query)) ||
-               (HasAVX && typePairInSet(0, 1, {{v4s32, v4s64}})(Query)) ||
-               (HasAVX512 && typePairInSet(0, 1, {{v8s32, v8s64}})(Query));
-      });
+  getActionDefinitionsBuilder(G_FPEXT)
+      .legalFor(HasSSE2, {{s64, s32}})
+      .legalFor(HasAVX, {{v4s64, v4s32}})
+      .legalFor(HasAVX512, {{v8s64, v8s32}})
+      .customFor(UseX87, {{s64, s32}, {s80, s32}, {s80, s64}});
+
+  getActionDefinitionsBuilder(G_FPTRUNC)
+      .legalFor(HasSSE2, {{s32, s64}})
+      .legalFor(HasAVX, {{v4s32, v4s64}})
+      .legalFor(HasAVX512, {{v8s32, v8s64}})
+      .customFor(UseX87, {{s32, s64}, {s32, s80}, {s64, s80}});
 
   getActionDefinitionsBuilder(G_SITOFP)
       .legalIf([=](const LegalityQuery &Query) {
@@ -671,6 +676,9 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
     return legalizeUITOFP(MI, MRI, Helper);
   case TargetOpcode::G_STORE:
     return legalizeNarrowingStore(MI, MRI, Helper);
+  case TargetOpcode::G_FPEXT:
+  case TargetOpcode::G_FPTRUNC:
+    return legalizeFPExtAndTrunc(MI, MRI, Helper);
   }
   llvm_unreachable("expected switch to return");
 }
@@ -781,6 +789,33 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
   return true;
 }
 
+bool X86LegalizerInfo::legalizeFPExtAndTrunc(MachineInstr &MI,
+                                             MachineRegisterInfo &MRI,
+                                             LegalizerHelper &Helper) const {
+  assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
+          MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
+         "Only G_FPEXT and G_FPTRUNC are expected");
+  auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+  MachinePointerInfo PtrInfo;
+  LLT StackTy = MI.getOpcode() == TargetOpcode::G_FPEXT ? SrcTy : DstTy;
+  Align StackTyAlign = Helper.getStackTemporaryAlignment(StackTy);
+  auto StackTemp = Helper.createStackTemporary(StackTy.getSizeInBytes(),
+                                               StackTyAlign, PtrInfo);
+
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  MachineFunction &MF = MIRBuilder.getMF();
+  auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+                                           StackTy, StackTyAlign);
+  MIRBuilder.buildStore(SrcReg, StackTemp, *StoreMMO);
+
+  auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+                                          StackTy, StackTyAlign);
+  MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO);
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
                                          MachineInstr &MI) const {
   return true;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 54f776456397b..b224f3f46a2d5 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -48,6 +48,9 @@ class X86LegalizerInfo : public LegalizerInfo {
 
   bool legalizeNarrowingStore(MachineInstr &MI, MachineRegisterInfo &MRI,
                               LegalizerHelper &Helper) const;
+
+  bool legalizeFPExtAndTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+                             LegalizerHelper &Helper) const;
 };
 } // namespace llvm
 #endif
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll
deleted file mode 100644
index 8501009e2915a..0000000000000
--- a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK
-
-define double @test(float %a) {
-; CHECK-LABEL: test:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    cvtss2sd %xmm0, %xmm0
-; CHECK-NEXT:    retq
-entry:
-  %conv = fpext float %a to double
-  ret double %conv
-}
diff --git a/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll
new file mode 100644
index 0000000000000..0ad9b90806ce9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll
@@ -0,0 +1,279 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown                   -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,SDAG-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,SDAG-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX
+; COMM: FastISel has troubles with fp80 type
+; RUN: llc < %s -mtriple=i686-unknown-unknown                   -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,FAST-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,FAST-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX
+; COMM: GlobalISel can't legalize double stores on 32bit platform due to lack of double/integer distinguish during legalization
+; RUN: llc < %s -mtriple=i686-unknown-unknown                   -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes X86,GLOBAL-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes SSE,GLOBAL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX
+
+define double @fpext_float_to_double(float %f) {
+; X86-LABEL: fpext_float_to_double:
+; X86:       # %bb.0:
+; X86-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-NEXT:    retl
+;
+; SSE-LABEL: fpext_float_to_double:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtss2sd %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fpext_float_to_double:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fpext float %f to double
+  ret double %1
+}
+
+define x86_fp80 @fpext_float_to_x86_fp80(float %f) {
+; FASTSDAG-X86-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-X86:       # %bb.0:
+; FASTSDAG-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    flds -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    flds -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-X86-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-X86:       # %bb.0:
+; GLOBAL-X86-NEXT:    pushl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; GLOBAL-X86-NEXT:    movl %eax, (%esp)
+; GLOBAL-X86-NEXT:    flds (%esp)
+; GLOBAL-X86-NEXT:    popl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 4
+; GLOBAL-X86-NEXT:    retl
+;
+; GLOBAL-SSE-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    movd %xmm0, %eax
+; GLOBAL-SSE-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    flds -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    vmovd %xmm0, %eax
+; GLOBAL-AVX-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    flds -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fpext float %f to x86_fp80
+  ret x86_fp80 %1
+}
+
+define x86_fp80 @fpext_double_to_x86_fp80(double %d) {
+; FASTSDAG-X86-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-X86:       # %bb.0:
+; FASTSDAG-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-X86-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-X86:       # %bb.0:
+; GLOBAL-X86-NEXT:    pushl %ebp
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT:    .cfi_offset %ebp, -8
+; GLOBAL-X86-NEXT:    movl %esp, %ebp
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_register %ebp
+; GLOBAL-X86-NEXT:    andl $-8, %esp
+; GLOBAL-X86-NEXT:    subl $8, %esp
+; GLOBAL-X86-NEXT:    leal 8(%ebp), %eax
+; GLOBAL-X86-NEXT:    movl 8(%ebp), %ecx
+; GLOBAL-X86-NEXT:    movl 4(%eax), %eax
+; GLOBAL-X86-NEXT:    movl %esp, %edx
+; GLOBAL-X86-NEXT:    movl %ecx, (%esp)
+; GLOBAL-X86-NEXT:    movl %eax, 4(%edx)
+; GLOBAL-X86-NEXT:    fldl (%esp)
+; GLOBAL-X86-NEXT:    movl %ebp, %esp
+; GLOBAL-X86-NEXT:    popl %ebp
+; GLOBAL-X86-NEXT:    .cfi_def_cfa %esp, 4
+; GLOBAL-X86-NEXT:    retl
+;
+; GLOBAL-SSE-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    movq %xmm0, %rax
+; GLOBAL-SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    vmovq %xmm0, %rax
+; GLOBAL-AVX-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fpext double %d to x86_fp80
+  ret x86_fp80 %1
+}
+
+define float @fptrunc_double_to_float(double %d) {
+; FASTSDAG-X86-LABEL: fptrunc_double_to_float:
+; FASTSDAG-X86:       # %bb.0:
+; FASTSDAG-X86-NEXT:    pushl %eax
+; FASTSDAG-X86-NEXT:    .cfi_def_cfa_offset 8
+; FASTSDAG-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT:    fstps (%esp)
+; FASTSDAG-X86-NEXT:    flds (%esp)
+; FASTSDAG-X86-NEXT:    popl %eax
+; FASTSDAG-X86-NEXT:    .cfi_def_cfa_offset 4
+; FASTSDAG-X86-NEXT:    retl
+;
+; SSE-LABEL: fptrunc_double_to_float:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtsd2ss %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fptrunc_double_to_float:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; GLOBAL-X86-LABEL: fptrunc_double_to_float:
+; GLOBAL-X86:       # %bb.0:
+; GLOBAL-X86-NEXT:    pushl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; GLOBAL-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; GLOBAL-X86-NEXT:    movl 4(%eax), %eax
+; GLOBAL-X86-NEXT:    movl %esp, %edx
+; GLOBAL-X86-NEXT:    movl %ecx, (%esp)
+; GLOBAL-X86-NEXT:    movl %eax, 4(%edx)
+; GLOBAL-X86-NEXT:    flds (%esp)
+; GLOBAL-X86-NEXT:    popl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 4
+; GLOBAL-X86-NEXT:    retl
+  %1 = fptrunc double %d to float
+  ret float %1
+}
+
+define float @fptrunc_x86_fp80_to_float(x86_fp80 %x) {
+; X86-LABEL: fptrunc_x86_fp80_to_float:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    fldt {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    flds (%esp)
+; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_float:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_float:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_float:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; GLOBAL-SSE-NEXT:    movd %eax, %xmm0
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_float:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; GLOBAL-AVX-NEXT:    vmovd %eax, %xmm0
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fptrunc x86_fp80 %x to float
+  ret float %1
+}
+
+define double @fptrunc_x86_fp80_to_double(x86_fp80 %x) {
+; X86-LABEL: fptrunc_x86_fp80_to_double:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    fldt 8(%ebp)
+; X86-NEXT:    fstpl (%esp)
+; X86-NEXT:    fldl (%esp)
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_double:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_double:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_double:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
+; GLOBAL-SSE-NEXT:    movq %rax, %xmm0
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_double:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
+; GLOBAL-AVX-NEXT:    vmovq %rax, %xmm0
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fptrunc x86_fp80 %x to double
+  ret double %1
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FAST-AVX: {{.*}}
+; FAST-SSE: {{.*}}
+; FAST-X86: {{.*}}
+; SDAG-AVX: {{.*}}
+; SDAG-SSE: {{.*}}
+; SDAG-X86: {{.*}}

llvmbot · 2025-05-27T14:35:30Z

@llvm/pr-subscribers-backend-x86

Author: Evgenii Kudriashov (e-kud)

Changes

We intentionally omit s32->s64 and s64->s32 variants of stores and loads for X87 because during legalization we can't determine whether it is a floating point store or an integer one.

Full diff: https://github.com/llvm/llvm-project/pull/141611.diff

4 Files Affected:

(modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp (+48-13)
(modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.h (+3)
(removed) llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll (-12)
(added) llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll (+279)

diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index f008cb1bea839..58215d4e00202 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -376,9 +376,15 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
     Action.legalForTypesWithMemDesc({{s8, p0, s8, 1},
                                      {s16, p0, s16, 1},
                                      {s32, p0, s32, 1},
-                                     {s80, p0, s80, 1},
                                      {p0, p0, p0, 1},
                                      {v4s8, p0, v4s8, 1}});
+
+    if (UseX87)
+      Action.legalForTypesWithMemDesc({{s80, p0, s32, 1},
+                                       {s80, p0, s64, 1},
+                                       {s32, p0, s80, 1},
+                                       {s64, p0, s80, 1},
+                                       {s80, p0, s80, 1}});
     if (Is64Bit)
       Action.legalForTypesWithMemDesc(
           {{s64, p0, s64, 1}, {v2s32, p0, v2s32, 1}});
@@ -476,18 +482,17 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
       .widenScalarToNextPow2(1);
 
   // fp conversions
-  getActionDefinitionsBuilder(G_FPEXT).legalIf([=](const LegalityQuery &Query) {
-    return (HasSSE2 && typePairInSet(0, 1, {{s64, s32}})(Query)) ||
-           (HasAVX && typePairInSet(0, 1, {{v4s64, v4s32}})(Query)) ||
-           (HasAVX512 && typePairInSet(0, 1, {{v8s64, v8s32}})(Query));
-  });
-
-  getActionDefinitionsBuilder(G_FPTRUNC).legalIf(
-      [=](const LegalityQuery &Query) {
-        return (HasSSE2 && typePairInSet(0, 1, {{s32, s64}})(Query)) ||
-               (HasAVX && typePairInSet(0, 1, {{v4s32, v4s64}})(Query)) ||
-               (HasAVX512 && typePairInSet(0, 1, {{v8s32, v8s64}})(Query));
-      });
+  getActionDefinitionsBuilder(G_FPEXT)
+      .legalFor(HasSSE2, {{s64, s32}})
+      .legalFor(HasAVX, {{v4s64, v4s32}})
+      .legalFor(HasAVX512, {{v8s64, v8s32}})
+      .customFor(UseX87, {{s64, s32}, {s80, s32}, {s80, s64}});
+
+  getActionDefinitionsBuilder(G_FPTRUNC)
+      .legalFor(HasSSE2, {{s32, s64}})
+      .legalFor(HasAVX, {{v4s32, v4s64}})
+      .legalFor(HasAVX512, {{v8s32, v8s64}})
+      .customFor(UseX87, {{s32, s64}, {s32, s80}, {s64, s80}});
 
   getActionDefinitionsBuilder(G_SITOFP)
       .legalIf([=](const LegalityQuery &Query) {
@@ -671,6 +676,9 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
     return legalizeUITOFP(MI, MRI, Helper);
   case TargetOpcode::G_STORE:
     return legalizeNarrowingStore(MI, MRI, Helper);
+  case TargetOpcode::G_FPEXT:
+  case TargetOpcode::G_FPTRUNC:
+    return legalizeFPExtAndTrunc(MI, MRI, Helper);
   }
   llvm_unreachable("expected switch to return");
 }
@@ -781,6 +789,33 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
   return true;
 }
 
+bool X86LegalizerInfo::legalizeFPExtAndTrunc(MachineInstr &MI,
+                                             MachineRegisterInfo &MRI,
+                                             LegalizerHelper &Helper) const {
+  assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
+          MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
+         "Only G_FPEXT and G_FPTRUNC are expected");
+  auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+  MachinePointerInfo PtrInfo;
+  LLT StackTy = MI.getOpcode() == TargetOpcode::G_FPEXT ? SrcTy : DstTy;
+  Align StackTyAlign = Helper.getStackTemporaryAlignment(StackTy);
+  auto StackTemp = Helper.createStackTemporary(StackTy.getSizeInBytes(),
+                                               StackTyAlign, PtrInfo);
+
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  MachineFunction &MF = MIRBuilder.getMF();
+  auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+                                           StackTy, StackTyAlign);
+  MIRBuilder.buildStore(SrcReg, StackTemp, *StoreMMO);
+
+  auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+                                          StackTy, StackTyAlign);
+  MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO);
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
                                          MachineInstr &MI) const {
   return true;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 54f776456397b..b224f3f46a2d5 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -48,6 +48,9 @@ class X86LegalizerInfo : public LegalizerInfo {
 
   bool legalizeNarrowingStore(MachineInstr &MI, MachineRegisterInfo &MRI,
                               LegalizerHelper &Helper) const;
+
+  bool legalizeFPExtAndTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+                             LegalizerHelper &Helper) const;
 };
 } // namespace llvm
 #endif
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll
deleted file mode 100644
index 8501009e2915a..0000000000000
--- a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK
-
-define double @test(float %a) {
-; CHECK-LABEL: test:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    cvtss2sd %xmm0, %xmm0
-; CHECK-NEXT:    retq
-entry:
-  %conv = fpext float %a to double
-  ret double %conv
-}
diff --git a/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll
new file mode 100644
index 0000000000000..0ad9b90806ce9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll
@@ -0,0 +1,279 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown                   -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,SDAG-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,SDAG-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX
+; COMM: FastISel has troubles with fp80 type
+; RUN: llc < %s -mtriple=i686-unknown-unknown                   -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,FAST-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,FAST-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX
+; COMM: GlobalISel can't legalize double stores on 32bit platform due to lack of double/integer distinguish during legalization
+; RUN: llc < %s -mtriple=i686-unknown-unknown                   -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes X86,GLOBAL-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes SSE,GLOBAL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX
+
+define double @fpext_float_to_double(float %f) {
+; X86-LABEL: fpext_float_to_double:
+; X86:       # %bb.0:
+; X86-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-NEXT:    retl
+;
+; SSE-LABEL: fpext_float_to_double:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtss2sd %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fpext_float_to_double:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fpext float %f to double
+  ret double %1
+}
+
+define x86_fp80 @fpext_float_to_x86_fp80(float %f) {
+; FASTSDAG-X86-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-X86:       # %bb.0:
+; FASTSDAG-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    flds -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    flds -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-X86-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-X86:       # %bb.0:
+; GLOBAL-X86-NEXT:    pushl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; GLOBAL-X86-NEXT:    movl %eax, (%esp)
+; GLOBAL-X86-NEXT:    flds (%esp)
+; GLOBAL-X86-NEXT:    popl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 4
+; GLOBAL-X86-NEXT:    retl
+;
+; GLOBAL-SSE-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    movd %xmm0, %eax
+; GLOBAL-SSE-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    flds -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    vmovd %xmm0, %eax
+; GLOBAL-AVX-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    flds -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fpext float %f to x86_fp80
+  ret x86_fp80 %1
+}
+
+define x86_fp80 @fpext_double_to_x86_fp80(double %d) {
+; FASTSDAG-X86-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-X86:       # %bb.0:
+; FASTSDAG-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-X86-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-X86:       # %bb.0:
+; GLOBAL-X86-NEXT:    pushl %ebp
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT:    .cfi_offset %ebp, -8
+; GLOBAL-X86-NEXT:    movl %esp, %ebp
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_register %ebp
+; GLOBAL-X86-NEXT:    andl $-8, %esp
+; GLOBAL-X86-NEXT:    subl $8, %esp
+; GLOBAL-X86-NEXT:    leal 8(%ebp), %eax
+; GLOBAL-X86-NEXT:    movl 8(%ebp), %ecx
+; GLOBAL-X86-NEXT:    movl 4(%eax), %eax
+; GLOBAL-X86-NEXT:    movl %esp, %edx
+; GLOBAL-X86-NEXT:    movl %ecx, (%esp)
+; GLOBAL-X86-NEXT:    movl %eax, 4(%edx)
+; GLOBAL-X86-NEXT:    fldl (%esp)
+; GLOBAL-X86-NEXT:    movl %ebp, %esp
+; GLOBAL-X86-NEXT:    popl %ebp
+; GLOBAL-X86-NEXT:    .cfi_def_cfa %esp, 4
+; GLOBAL-X86-NEXT:    retl
+;
+; GLOBAL-SSE-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    movq %xmm0, %rax
+; GLOBAL-SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    vmovq %xmm0, %rax
+; GLOBAL-AVX-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fpext double %d to x86_fp80
+  ret x86_fp80 %1
+}
+
+define float @fptrunc_double_to_float(double %d) {
+; FASTSDAG-X86-LABEL: fptrunc_double_to_float:
+; FASTSDAG-X86:       # %bb.0:
+; FASTSDAG-X86-NEXT:    pushl %eax
+; FASTSDAG-X86-NEXT:    .cfi_def_cfa_offset 8
+; FASTSDAG-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT:    fstps (%esp)
+; FASTSDAG-X86-NEXT:    flds (%esp)
+; FASTSDAG-X86-NEXT:    popl %eax
+; FASTSDAG-X86-NEXT:    .cfi_def_cfa_offset 4
+; FASTSDAG-X86-NEXT:    retl
+;
+; SSE-LABEL: fptrunc_double_to_float:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtsd2ss %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fptrunc_double_to_float:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; GLOBAL-X86-LABEL: fptrunc_double_to_float:
+; GLOBAL-X86:       # %bb.0:
+; GLOBAL-X86-NEXT:    pushl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; GLOBAL-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; GLOBAL-X86-NEXT:    movl 4(%eax), %eax
+; GLOBAL-X86-NEXT:    movl %esp, %edx
+; GLOBAL-X86-NEXT:    movl %ecx, (%esp)
+; GLOBAL-X86-NEXT:    movl %eax, 4(%edx)
+; GLOBAL-X86-NEXT:    flds (%esp)
+; GLOBAL-X86-NEXT:    popl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 4
+; GLOBAL-X86-NEXT:    retl
+  %1 = fptrunc double %d to float
+  ret float %1
+}
+
+define float @fptrunc_x86_fp80_to_float(x86_fp80 %x) {
+; X86-LABEL: fptrunc_x86_fp80_to_float:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    fldt {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    flds (%esp)
+; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_float:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_float:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_float:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; GLOBAL-SSE-NEXT:    movd %eax, %xmm0
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_float:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; GLOBAL-AVX-NEXT:    vmovd %eax, %xmm0
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fptrunc x86_fp80 %x to float
+  ret float %1
+}
+
+define double @fptrunc_x86_fp80_to_double(x86_fp80 %x) {
+; X86-LABEL: fptrunc_x86_fp80_to_double:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    fldt 8(%ebp)
+; X86-NEXT:    fstpl (%esp)
+; X86-NEXT:    fldl (%esp)
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_double:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_double:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_double:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
+; GLOBAL-SSE-NEXT:    movq %rax, %xmm0
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_double:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
+; GLOBAL-AVX-NEXT:    vmovq %rax, %xmm0
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fptrunc x86_fp80 %x to double
+  ret double %1
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FAST-AVX: {{.*}}
+; FAST-SSE: {{.*}}
+; FAST-X86: {{.*}}
+; SDAG-AVX: {{.*}}
+; SDAG-SSE: {{.*}}
+; SDAG-X86: {{.*}}

arsenm

We intentionally omit s32->s64 and s64->s32 variants of stores and loads for X87 because during legalization we can't determine whether it is a floating point store or an integer one.

But you shouldn't need this? The FP-ness should be encoded in the load/store directly. i.e. are we missing FP ext load and store?

arsenm · 2025-05-27T14:40:27Z

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

+
+  auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+                                          StackTy, StackTyAlign);
+  MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO);


Can this be a generic lower action that creates an FP extending load / FP truncating store?

Yes, it looks quite generic. Let me move it to generic lower actions.

@arsenm we have a conflicting FP64->FP16 lowering

llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines 8010 to 8019 in 59b7b5b

LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {

auto [DstTy, SrcTy] = MI.getFirst2LLTs();

const LLT S64 = LLT::scalar(64);

const LLT S16 = LLT::scalar(16);

if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)

return lowerFPTRUNC_F64_TO_F16(MI);

return UnableToLegalize;

}

It doesn't hurt X86 but is it ok for the generic lowering to have FP64->FP16 conversion through scalars but for others using memory?

@arsenm ping.

The generic just needs to do something. It doesn't really matter what. Ideally the legalizer helper would have easily accessible helpers for each expansion choice

Got it. I've put the implementation into the Helper.

Ideally the legalizer helper would have easily accessible helpers for each expansion choice

Probably it would be great if we had some API like .lowerWithFor(Pred, HelperFunction, {{types}}). The problem here is that helper functions are member and we probably want to avoid passing member references. So some auxiliary enum for this scenario is required. However maybe custom with invoking a helper is not that bad to invent API for more precise lowering.

e-kud · 2025-05-27T14:50:25Z

We intentionally omit s32->s64 and s64->s32 variants of stores and loads for X87 because during legalization we can't determine whether it is a floating point store or an integer one.

But you shouldn't need this? The FP-ness should be encoded in the load/store directly. i.e. are we missing FP ext load and store?

Yes, we don't have them:

llvm-project/llvm/include/llvm/Target/GenericOpcodes.td

Line 1250 in 062353d

// Memory ops

IIUC it should be resolved once FP types are added https://discourse.llvm.org/t/rfc-globalisel-adding-fp-type-information-to-llt/83349. After this LLT types will show whether store or load is a float or integer operation. And MMO will show whether it is extending or truncating.

e-kud · 2025-06-17T08:31:15Z

@RKSimon @arsenm ping

RKSimon · 2025-07-14T09:12:50Z

@e-kud please can you merge against trunk latest

e-kud · 2025-07-15T00:23:21Z

@e-kud please can you merge against trunk latest

Done. It's just a conflict with G_GET_ROUNDING as both patches add to the end.

e-kud · 2025-07-29T10:42:39Z

@RKSimon @arsenm ping

e-kud · 2025-09-01T23:08:28Z

@RKSimon @arsenm ping

RKSimon

please update to fix CI failure

e-kud · 2025-10-09T01:55:30Z

@RKSimon @arsenm ping

arsenm · 2025-10-10T08:07:55Z

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

    widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
    widenScalarDst(MI, WideTy);
    MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
    widenScalarDst(MI, WideTy, 1);


Suggested change

// fp conversions using truncating and extending loads and stores.

// Floating-point conversions using truncating and extending loads and stores.

arsenm · 2025-10-10T08:08:49Z

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

+LegalizerHelper::lowerFPExtAndTruncMem(MachineInstr &MI) {
+  assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
+          MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
+         "Only G_FPEXT and G_FPTRUNC are expected");


This can't just use a regular load and store. We should have a proper FP extending load opcode (like G_ZEXTLOAD), and also need the truncate case

arsenm · 2025-10-10T08:09:09Z

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

-    return legalizeFPExtAndTrunc(MI, MRI, Helper);
  case TargetOpcode::G_GET_ROUNDING:
    return legalizeGETROUNDING(MI, MRI, Helper);
  case TargetOpcode::G_SET_ROUNDING:


Can you rebase to drop this unrelated stuff that already was submitted

e-kud added 2 commits May 27, 2025 06:54

Precommit tests

a603218

[X86][GlobalISel] Support fp80 for G_FPTRUNC and G_FPEXT

cd09cc3

e-kud requested review from arsenm and RKSimon May 27, 2025 14:34

llvmbot added backend:X86 llvm:globalisel labels May 27, 2025

arsenm reviewed May 27, 2025

View reviewed changes

Merge remote-tracking branch 'origin/main' into global-fp-trunc-ext

62afbcf

e-kud requested a review from arsenm June 10, 2025 08:32

Merge remote-tracking branch 'origin/main' into global-fp-trunc-ext

f02cc9e

Merge remote-tracking branch 'origin/main' into global-fp-trunc-ext

1d9a328

Merge branch 'main' into global-fp-trunc-ext

b200eab

Merge branch 'main' into global-fp-trunc-ext

46f338d

RKSimon reviewed Sep 2, 2025

View reviewed changes

e-kud added 5 commits September 3, 2025 02:40

Merge branch 'main' into global-fp-trunc-ext

7f413f1

Merge branch 'main' into global-fp-trunc-ext

ddef930

Generalize FPTRUNC/FPEXT lowering using mem

8099816

Merge remote-tracking branch 'origin/main' into global-fp-trunc-ext

e1cc4b1

Merge branch 'main' into global-fp-trunc-ext

8ec2dd9

arsenm reviewed Oct 10, 2025

View reviewed changes

	LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
	auto [DstTy, SrcTy] = MI.getFirst2LLTs();
	const LLT S64 = LLT::scalar(64);
	const LLT S16 = LLT::scalar(16);

	if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
	return lowerFPTRUNC_F64_TO_F16(MI);

	return UnableToLegalize;
	}

	// fp conversions using truncating and extending loads and stores.
	// Floating-point conversions using truncating and extending loads and stores.

[X86][GlobalISel] Support fp80 for G_FPTRUNC and G_FPEXT #141611

Are you sure you want to change the base?

[X86][GlobalISel] Support fp80 for G_FPTRUNC and G_FPEXT #141611

Conversation

e-kud commented May 27, 2025

Uh oh!

llvmbot commented May 27, 2025

Uh oh!

llvmbot commented May 27, 2025

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

e-kud May 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

e-kud commented May 27, 2025

Uh oh!

e-kud commented Jun 17, 2025

Uh oh!

RKSimon commented Jul 14, 2025

Uh oh!

e-kud commented Jul 15, 2025

Uh oh!

e-kud commented Jul 29, 2025

Uh oh!

e-kud commented Sep 1, 2025

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

e-kud commented Oct 9, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

e-kud May 28, 2025 •

edited

Loading