Skip to content

Conversation

e-kud
Copy link
Contributor

@e-kud e-kud commented May 27, 2025

We intentionally omit s32->s64 and s64->s32 variants of stores and loads for X87 because during legalization we can't determine whether it is a floating point store or an integer one.

@llvmbot
Copy link
Member

llvmbot commented May 27, 2025

@llvm/pr-subscribers-llvm-globalisel

Author: Evgenii Kudriashov (e-kud)

Changes

We intentionally omit s32->s64 and s64->s32 variants of stores and loads for X87 because during legalization we can't determine whether it is a floating point store or an integer one.


Full diff: https://github.com/llvm/llvm-project/pull/141611.diff

4 Files Affected:

  • (modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp (+48-13)
  • (modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.h (+3)
  • (removed) llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll (-12)
  • (added) llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll (+279)
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index f008cb1bea839..58215d4e00202 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -376,9 +376,15 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
     Action.legalForTypesWithMemDesc({{s8, p0, s8, 1},
                                      {s16, p0, s16, 1},
                                      {s32, p0, s32, 1},
-                                     {s80, p0, s80, 1},
                                      {p0, p0, p0, 1},
                                      {v4s8, p0, v4s8, 1}});
+
+    if (UseX87)
+      Action.legalForTypesWithMemDesc({{s80, p0, s32, 1},
+                                       {s80, p0, s64, 1},
+                                       {s32, p0, s80, 1},
+                                       {s64, p0, s80, 1},
+                                       {s80, p0, s80, 1}});
     if (Is64Bit)
       Action.legalForTypesWithMemDesc(
           {{s64, p0, s64, 1}, {v2s32, p0, v2s32, 1}});
@@ -476,18 +482,17 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
       .widenScalarToNextPow2(1);
 
   // fp conversions
-  getActionDefinitionsBuilder(G_FPEXT).legalIf([=](const LegalityQuery &Query) {
-    return (HasSSE2 && typePairInSet(0, 1, {{s64, s32}})(Query)) ||
-           (HasAVX && typePairInSet(0, 1, {{v4s64, v4s32}})(Query)) ||
-           (HasAVX512 && typePairInSet(0, 1, {{v8s64, v8s32}})(Query));
-  });
-
-  getActionDefinitionsBuilder(G_FPTRUNC).legalIf(
-      [=](const LegalityQuery &Query) {
-        return (HasSSE2 && typePairInSet(0, 1, {{s32, s64}})(Query)) ||
-               (HasAVX && typePairInSet(0, 1, {{v4s32, v4s64}})(Query)) ||
-               (HasAVX512 && typePairInSet(0, 1, {{v8s32, v8s64}})(Query));
-      });
+  getActionDefinitionsBuilder(G_FPEXT)
+      .legalFor(HasSSE2, {{s64, s32}})
+      .legalFor(HasAVX, {{v4s64, v4s32}})
+      .legalFor(HasAVX512, {{v8s64, v8s32}})
+      .customFor(UseX87, {{s64, s32}, {s80, s32}, {s80, s64}});
+
+  getActionDefinitionsBuilder(G_FPTRUNC)
+      .legalFor(HasSSE2, {{s32, s64}})
+      .legalFor(HasAVX, {{v4s32, v4s64}})
+      .legalFor(HasAVX512, {{v8s32, v8s64}})
+      .customFor(UseX87, {{s32, s64}, {s32, s80}, {s64, s80}});
 
   getActionDefinitionsBuilder(G_SITOFP)
       .legalIf([=](const LegalityQuery &Query) {
@@ -671,6 +676,9 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
     return legalizeUITOFP(MI, MRI, Helper);
   case TargetOpcode::G_STORE:
     return legalizeNarrowingStore(MI, MRI, Helper);
+  case TargetOpcode::G_FPEXT:
+  case TargetOpcode::G_FPTRUNC:
+    return legalizeFPExtAndTrunc(MI, MRI, Helper);
   }
   llvm_unreachable("expected switch to return");
 }
@@ -781,6 +789,33 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
   return true;
 }
 
+bool X86LegalizerInfo::legalizeFPExtAndTrunc(MachineInstr &MI,
+                                             MachineRegisterInfo &MRI,
+                                             LegalizerHelper &Helper) const {
+  assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
+          MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
+         "Only G_FPEXT and G_FPTRUNC are expected");
+  auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+  MachinePointerInfo PtrInfo;
+  LLT StackTy = MI.getOpcode() == TargetOpcode::G_FPEXT ? SrcTy : DstTy;
+  Align StackTyAlign = Helper.getStackTemporaryAlignment(StackTy);
+  auto StackTemp = Helper.createStackTemporary(StackTy.getSizeInBytes(),
+                                               StackTyAlign, PtrInfo);
+
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  MachineFunction &MF = MIRBuilder.getMF();
+  auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+                                           StackTy, StackTyAlign);
+  MIRBuilder.buildStore(SrcReg, StackTemp, *StoreMMO);
+
+  auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+                                          StackTy, StackTyAlign);
+  MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO);
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
                                          MachineInstr &MI) const {
   return true;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 54f776456397b..b224f3f46a2d5 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -48,6 +48,9 @@ class X86LegalizerInfo : public LegalizerInfo {
 
   bool legalizeNarrowingStore(MachineInstr &MI, MachineRegisterInfo &MRI,
                               LegalizerHelper &Helper) const;
+
+  bool legalizeFPExtAndTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+                             LegalizerHelper &Helper) const;
 };
 } // namespace llvm
 #endif
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll
deleted file mode 100644
index 8501009e2915a..0000000000000
--- a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK
-
-define double @test(float %a) {
-; CHECK-LABEL: test:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    cvtss2sd %xmm0, %xmm0
-; CHECK-NEXT:    retq
-entry:
-  %conv = fpext float %a to double
-  ret double %conv
-}
diff --git a/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll
new file mode 100644
index 0000000000000..0ad9b90806ce9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll
@@ -0,0 +1,279 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown                   -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,SDAG-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,SDAG-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX
+; COMM: FastISel has troubles with fp80 type
+; RUN: llc < %s -mtriple=i686-unknown-unknown                   -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,FAST-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,FAST-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX
+; COMM: GlobalISel can't legalize double stores on 32bit platform due to lack of double/integer distinguish during legalization
+; RUN: llc < %s -mtriple=i686-unknown-unknown                   -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes X86,GLOBAL-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes SSE,GLOBAL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX
+
+define double @fpext_float_to_double(float %f) {
+; X86-LABEL: fpext_float_to_double:
+; X86:       # %bb.0:
+; X86-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-NEXT:    retl
+;
+; SSE-LABEL: fpext_float_to_double:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtss2sd %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fpext_float_to_double:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fpext float %f to double
+  ret double %1
+}
+
+define x86_fp80 @fpext_float_to_x86_fp80(float %f) {
+; FASTSDAG-X86-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-X86:       # %bb.0:
+; FASTSDAG-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    flds -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    flds -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-X86-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-X86:       # %bb.0:
+; GLOBAL-X86-NEXT:    pushl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; GLOBAL-X86-NEXT:    movl %eax, (%esp)
+; GLOBAL-X86-NEXT:    flds (%esp)
+; GLOBAL-X86-NEXT:    popl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 4
+; GLOBAL-X86-NEXT:    retl
+;
+; GLOBAL-SSE-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    movd %xmm0, %eax
+; GLOBAL-SSE-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    flds -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    vmovd %xmm0, %eax
+; GLOBAL-AVX-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    flds -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fpext float %f to x86_fp80
+  ret x86_fp80 %1
+}
+
+define x86_fp80 @fpext_double_to_x86_fp80(double %d) {
+; FASTSDAG-X86-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-X86:       # %bb.0:
+; FASTSDAG-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-X86-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-X86:       # %bb.0:
+; GLOBAL-X86-NEXT:    pushl %ebp
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT:    .cfi_offset %ebp, -8
+; GLOBAL-X86-NEXT:    movl %esp, %ebp
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_register %ebp
+; GLOBAL-X86-NEXT:    andl $-8, %esp
+; GLOBAL-X86-NEXT:    subl $8, %esp
+; GLOBAL-X86-NEXT:    leal 8(%ebp), %eax
+; GLOBAL-X86-NEXT:    movl 8(%ebp), %ecx
+; GLOBAL-X86-NEXT:    movl 4(%eax), %eax
+; GLOBAL-X86-NEXT:    movl %esp, %edx
+; GLOBAL-X86-NEXT:    movl %ecx, (%esp)
+; GLOBAL-X86-NEXT:    movl %eax, 4(%edx)
+; GLOBAL-X86-NEXT:    fldl (%esp)
+; GLOBAL-X86-NEXT:    movl %ebp, %esp
+; GLOBAL-X86-NEXT:    popl %ebp
+; GLOBAL-X86-NEXT:    .cfi_def_cfa %esp, 4
+; GLOBAL-X86-NEXT:    retl
+;
+; GLOBAL-SSE-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    movq %xmm0, %rax
+; GLOBAL-SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    vmovq %xmm0, %rax
+; GLOBAL-AVX-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fpext double %d to x86_fp80
+  ret x86_fp80 %1
+}
+
+define float @fptrunc_double_to_float(double %d) {
+; FASTSDAG-X86-LABEL: fptrunc_double_to_float:
+; FASTSDAG-X86:       # %bb.0:
+; FASTSDAG-X86-NEXT:    pushl %eax
+; FASTSDAG-X86-NEXT:    .cfi_def_cfa_offset 8
+; FASTSDAG-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT:    fstps (%esp)
+; FASTSDAG-X86-NEXT:    flds (%esp)
+; FASTSDAG-X86-NEXT:    popl %eax
+; FASTSDAG-X86-NEXT:    .cfi_def_cfa_offset 4
+; FASTSDAG-X86-NEXT:    retl
+;
+; SSE-LABEL: fptrunc_double_to_float:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtsd2ss %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fptrunc_double_to_float:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; GLOBAL-X86-LABEL: fptrunc_double_to_float:
+; GLOBAL-X86:       # %bb.0:
+; GLOBAL-X86-NEXT:    pushl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; GLOBAL-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; GLOBAL-X86-NEXT:    movl 4(%eax), %eax
+; GLOBAL-X86-NEXT:    movl %esp, %edx
+; GLOBAL-X86-NEXT:    movl %ecx, (%esp)
+; GLOBAL-X86-NEXT:    movl %eax, 4(%edx)
+; GLOBAL-X86-NEXT:    flds (%esp)
+; GLOBAL-X86-NEXT:    popl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 4
+; GLOBAL-X86-NEXT:    retl
+  %1 = fptrunc double %d to float
+  ret float %1
+}
+
+define float @fptrunc_x86_fp80_to_float(x86_fp80 %x) {
+; X86-LABEL: fptrunc_x86_fp80_to_float:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    fldt {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    flds (%esp)
+; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_float:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_float:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_float:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; GLOBAL-SSE-NEXT:    movd %eax, %xmm0
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_float:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; GLOBAL-AVX-NEXT:    vmovd %eax, %xmm0
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fptrunc x86_fp80 %x to float
+  ret float %1
+}
+
+define double @fptrunc_x86_fp80_to_double(x86_fp80 %x) {
+; X86-LABEL: fptrunc_x86_fp80_to_double:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    fldt 8(%ebp)
+; X86-NEXT:    fstpl (%esp)
+; X86-NEXT:    fldl (%esp)
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_double:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_double:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_double:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
+; GLOBAL-SSE-NEXT:    movq %rax, %xmm0
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_double:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
+; GLOBAL-AVX-NEXT:    vmovq %rax, %xmm0
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fptrunc x86_fp80 %x to double
+  ret double %1
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FAST-AVX: {{.*}}
+; FAST-SSE: {{.*}}
+; FAST-X86: {{.*}}
+; SDAG-AVX: {{.*}}
+; SDAG-SSE: {{.*}}
+; SDAG-X86: {{.*}}

@llvmbot
Copy link
Member

llvmbot commented May 27, 2025

@llvm/pr-subscribers-backend-x86

Author: Evgenii Kudriashov (e-kud)

Changes

We intentionally omit s32-&gt;s64 and s64-&gt;s32 variants of stores and loads for X87 because during legalization we can't determine whether it is a floating point store or an integer one.


Full diff: https://github.com/llvm/llvm-project/pull/141611.diff

4 Files Affected:

  • (modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp (+48-13)
  • (modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.h (+3)
  • (removed) llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll (-12)
  • (added) llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll (+279)
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index f008cb1bea839..58215d4e00202 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -376,9 +376,15 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
     Action.legalForTypesWithMemDesc({{s8, p0, s8, 1},
                                      {s16, p0, s16, 1},
                                      {s32, p0, s32, 1},
-                                     {s80, p0, s80, 1},
                                      {p0, p0, p0, 1},
                                      {v4s8, p0, v4s8, 1}});
+
+    if (UseX87)
+      Action.legalForTypesWithMemDesc({{s80, p0, s32, 1},
+                                       {s80, p0, s64, 1},
+                                       {s32, p0, s80, 1},
+                                       {s64, p0, s80, 1},
+                                       {s80, p0, s80, 1}});
     if (Is64Bit)
       Action.legalForTypesWithMemDesc(
           {{s64, p0, s64, 1}, {v2s32, p0, v2s32, 1}});
@@ -476,18 +482,17 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
       .widenScalarToNextPow2(1);
 
   // fp conversions
-  getActionDefinitionsBuilder(G_FPEXT).legalIf([=](const LegalityQuery &Query) {
-    return (HasSSE2 && typePairInSet(0, 1, {{s64, s32}})(Query)) ||
-           (HasAVX && typePairInSet(0, 1, {{v4s64, v4s32}})(Query)) ||
-           (HasAVX512 && typePairInSet(0, 1, {{v8s64, v8s32}})(Query));
-  });
-
-  getActionDefinitionsBuilder(G_FPTRUNC).legalIf(
-      [=](const LegalityQuery &Query) {
-        return (HasSSE2 && typePairInSet(0, 1, {{s32, s64}})(Query)) ||
-               (HasAVX && typePairInSet(0, 1, {{v4s32, v4s64}})(Query)) ||
-               (HasAVX512 && typePairInSet(0, 1, {{v8s32, v8s64}})(Query));
-      });
+  getActionDefinitionsBuilder(G_FPEXT)
+      .legalFor(HasSSE2, {{s64, s32}})
+      .legalFor(HasAVX, {{v4s64, v4s32}})
+      .legalFor(HasAVX512, {{v8s64, v8s32}})
+      .customFor(UseX87, {{s64, s32}, {s80, s32}, {s80, s64}});
+
+  getActionDefinitionsBuilder(G_FPTRUNC)
+      .legalFor(HasSSE2, {{s32, s64}})
+      .legalFor(HasAVX, {{v4s32, v4s64}})
+      .legalFor(HasAVX512, {{v8s32, v8s64}})
+      .customFor(UseX87, {{s32, s64}, {s32, s80}, {s64, s80}});
 
   getActionDefinitionsBuilder(G_SITOFP)
       .legalIf([=](const LegalityQuery &Query) {
@@ -671,6 +676,9 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
     return legalizeUITOFP(MI, MRI, Helper);
   case TargetOpcode::G_STORE:
     return legalizeNarrowingStore(MI, MRI, Helper);
+  case TargetOpcode::G_FPEXT:
+  case TargetOpcode::G_FPTRUNC:
+    return legalizeFPExtAndTrunc(MI, MRI, Helper);
   }
   llvm_unreachable("expected switch to return");
 }
@@ -781,6 +789,33 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
   return true;
 }
 
+bool X86LegalizerInfo::legalizeFPExtAndTrunc(MachineInstr &MI,
+                                             MachineRegisterInfo &MRI,
+                                             LegalizerHelper &Helper) const {
+  assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
+          MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
+         "Only G_FPEXT and G_FPTRUNC are expected");
+  auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+  MachinePointerInfo PtrInfo;
+  LLT StackTy = MI.getOpcode() == TargetOpcode::G_FPEXT ? SrcTy : DstTy;
+  Align StackTyAlign = Helper.getStackTemporaryAlignment(StackTy);
+  auto StackTemp = Helper.createStackTemporary(StackTy.getSizeInBytes(),
+                                               StackTyAlign, PtrInfo);
+
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  MachineFunction &MF = MIRBuilder.getMF();
+  auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+                                           StackTy, StackTyAlign);
+  MIRBuilder.buildStore(SrcReg, StackTemp, *StoreMMO);
+
+  auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+                                          StackTy, StackTyAlign);
+  MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO);
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
                                          MachineInstr &MI) const {
   return true;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 54f776456397b..b224f3f46a2d5 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -48,6 +48,9 @@ class X86LegalizerInfo : public LegalizerInfo {
 
   bool legalizeNarrowingStore(MachineInstr &MI, MachineRegisterInfo &MRI,
                               LegalizerHelper &Helper) const;
+
+  bool legalizeFPExtAndTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+                             LegalizerHelper &Helper) const;
 };
 } // namespace llvm
 #endif
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll
deleted file mode 100644
index 8501009e2915a..0000000000000
--- a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK
-
-define double @test(float %a) {
-; CHECK-LABEL: test:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    cvtss2sd %xmm0, %xmm0
-; CHECK-NEXT:    retq
-entry:
-  %conv = fpext float %a to double
-  ret double %conv
-}
diff --git a/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll
new file mode 100644
index 0000000000000..0ad9b90806ce9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll
@@ -0,0 +1,279 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown                   -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,SDAG-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,SDAG-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX
+; COMM: FastISel has troubles with fp80 type
+; RUN: llc < %s -mtriple=i686-unknown-unknown                   -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,FAST-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,FAST-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX
+; COMM: GlobalISel can't legalize double stores on 32bit platform due to lack of double/integer distinguish during legalization
+; RUN: llc < %s -mtriple=i686-unknown-unknown                   -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes X86,GLOBAL-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2    -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes SSE,GLOBAL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx     -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX
+
+define double @fpext_float_to_double(float %f) {
+; X86-LABEL: fpext_float_to_double:
+; X86:       # %bb.0:
+; X86-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-NEXT:    retl
+;
+; SSE-LABEL: fpext_float_to_double:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtss2sd %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fpext_float_to_double:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fpext float %f to double
+  ret double %1
+}
+
+define x86_fp80 @fpext_float_to_x86_fp80(float %f) {
+; FASTSDAG-X86-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-X86:       # %bb.0:
+; FASTSDAG-X86-NEXT:    flds {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    flds -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    flds -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-X86-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-X86:       # %bb.0:
+; GLOBAL-X86-NEXT:    pushl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; GLOBAL-X86-NEXT:    movl %eax, (%esp)
+; GLOBAL-X86-NEXT:    flds (%esp)
+; GLOBAL-X86-NEXT:    popl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 4
+; GLOBAL-X86-NEXT:    retl
+;
+; GLOBAL-SSE-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    movd %xmm0, %eax
+; GLOBAL-SSE-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    flds -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    vmovd %xmm0, %eax
+; GLOBAL-AVX-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    flds -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fpext float %f to x86_fp80
+  ret x86_fp80 %1
+}
+
+define x86_fp80 @fpext_double_to_x86_fp80(double %d) {
+; FASTSDAG-X86-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-X86:       # %bb.0:
+; FASTSDAG-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-X86-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-X86:       # %bb.0:
+; GLOBAL-X86-NEXT:    pushl %ebp
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT:    .cfi_offset %ebp, -8
+; GLOBAL-X86-NEXT:    movl %esp, %ebp
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_register %ebp
+; GLOBAL-X86-NEXT:    andl $-8, %esp
+; GLOBAL-X86-NEXT:    subl $8, %esp
+; GLOBAL-X86-NEXT:    leal 8(%ebp), %eax
+; GLOBAL-X86-NEXT:    movl 8(%ebp), %ecx
+; GLOBAL-X86-NEXT:    movl 4(%eax), %eax
+; GLOBAL-X86-NEXT:    movl %esp, %edx
+; GLOBAL-X86-NEXT:    movl %ecx, (%esp)
+; GLOBAL-X86-NEXT:    movl %eax, 4(%edx)
+; GLOBAL-X86-NEXT:    fldl (%esp)
+; GLOBAL-X86-NEXT:    movl %ebp, %esp
+; GLOBAL-X86-NEXT:    popl %ebp
+; GLOBAL-X86-NEXT:    .cfi_def_cfa %esp, 4
+; GLOBAL-X86-NEXT:    retl
+;
+; GLOBAL-SSE-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    movq %xmm0, %rax
+; GLOBAL-SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    vmovq %xmm0, %rax
+; GLOBAL-AVX-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fpext double %d to x86_fp80
+  ret x86_fp80 %1
+}
+
+define float @fptrunc_double_to_float(double %d) {
+; FASTSDAG-X86-LABEL: fptrunc_double_to_float:
+; FASTSDAG-X86:       # %bb.0:
+; FASTSDAG-X86-NEXT:    pushl %eax
+; FASTSDAG-X86-NEXT:    .cfi_def_cfa_offset 8
+; FASTSDAG-X86-NEXT:    fldl {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT:    fstps (%esp)
+; FASTSDAG-X86-NEXT:    flds (%esp)
+; FASTSDAG-X86-NEXT:    popl %eax
+; FASTSDAG-X86-NEXT:    .cfi_def_cfa_offset 4
+; FASTSDAG-X86-NEXT:    retl
+;
+; SSE-LABEL: fptrunc_double_to_float:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtsd2ss %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fptrunc_double_to_float:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; GLOBAL-X86-LABEL: fptrunc_double_to_float:
+; GLOBAL-X86:       # %bb.0:
+; GLOBAL-X86-NEXT:    pushl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; GLOBAL-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; GLOBAL-X86-NEXT:    movl 4(%eax), %eax
+; GLOBAL-X86-NEXT:    movl %esp, %edx
+; GLOBAL-X86-NEXT:    movl %ecx, (%esp)
+; GLOBAL-X86-NEXT:    movl %eax, 4(%edx)
+; GLOBAL-X86-NEXT:    flds (%esp)
+; GLOBAL-X86-NEXT:    popl %eax
+; GLOBAL-X86-NEXT:    .cfi_def_cfa_offset 4
+; GLOBAL-X86-NEXT:    retl
+  %1 = fptrunc double %d to float
+  ret float %1
+}
+
+define float @fptrunc_x86_fp80_to_float(x86_fp80 %x) {
+; X86-LABEL: fptrunc_x86_fp80_to_float:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    fldt {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    flds (%esp)
+; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_float:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_float:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_float:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; GLOBAL-SSE-NEXT:    movd %eax, %xmm0
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_float:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; GLOBAL-AVX-NEXT:    vmovd %eax, %xmm0
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fptrunc x86_fp80 %x to float
+  ret float %1
+}
+
+define double @fptrunc_x86_fp80_to_double(x86_fp80 %x) {
+; X86-LABEL: fptrunc_x86_fp80_to_double:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    fldt 8(%ebp)
+; X86-NEXT:    fstpl (%esp)
+; X86-NEXT:    fldl (%esp)
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+;
+; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_double:
+; FASTSDAG-SSE:       # %bb.0:
+; FASTSDAG-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; FASTSDAG-SSE-NEXT:    retq
+;
+; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_double:
+; FASTSDAG-AVX:       # %bb.0:
+; FASTSDAG-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; FASTSDAG-AVX-NEXT:    retq
+;
+; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_double:
+; GLOBAL-SSE:       # %bb.0:
+; GLOBAL-SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
+; GLOBAL-SSE-NEXT:    movq %rax, %xmm0
+; GLOBAL-SSE-NEXT:    retq
+;
+; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_double:
+; GLOBAL-AVX:       # %bb.0:
+; GLOBAL-AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
+; GLOBAL-AVX-NEXT:    vmovq %rax, %xmm0
+; GLOBAL-AVX-NEXT:    retq
+  %1 = fptrunc x86_fp80 %x to double
+  ret double %1
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FAST-AVX: {{.*}}
+; FAST-SSE: {{.*}}
+; FAST-X86: {{.*}}
+; SDAG-AVX: {{.*}}
+; SDAG-SSE: {{.*}}
+; SDAG-X86: {{.*}}

Copy link
Contributor

@arsenm arsenm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We intentionally omit s32->s64 and s64->s32 variants of stores and loads for X87 because during legalization we can't determine whether it is a floating point store or an integer one.

But you shouldn't need this? The FP-ness should be encoded in the load/store directly. i.e. are we missing FP ext load and store?


auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
StackTy, StackTyAlign);
MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be a generic lower action that creates an FP extending load / FP truncating store?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it looks quite generic. Let me move it to generic lower actions.

Copy link
Contributor Author

@e-kud e-kud May 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arsenm we have a conflicting FP64->FP16 lowering

LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
auto [DstTy, SrcTy] = MI.getFirst2LLTs();
const LLT S64 = LLT::scalar(64);
const LLT S16 = LLT::scalar(16);
if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
return lowerFPTRUNC_F64_TO_F16(MI);
return UnableToLegalize;
}

It doesn't hurt X86 but is it ok for the generic lowering to have FP64->FP16 conversion through scalars but for others using memory?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arsenm ping.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The generic just needs to do something. It doesn't really matter what. Ideally the legalizer helper would have easily accessible helpers for each expansion choice

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. I've put the implementation into the Helper.

Ideally the legalizer helper would have easily accessible helpers for each expansion choice

Probably it would be great if we had some API like .lowerWithFor(Pred, HelperFunction, {{types}}). The problem here is that helper functions are member and we probably want to avoid passing member references. So some auxiliary enum for this scenario is required. However maybe custom with invoking a helper is not that bad to invent API for more precise lowering.

@e-kud
Copy link
Contributor Author

e-kud commented May 27, 2025

We intentionally omit s32->s64 and s64->s32 variants of stores and loads for X87 because during legalization we can't determine whether it is a floating point store or an integer one.

But you shouldn't need this? The FP-ness should be encoded in the load/store directly. i.e. are we missing FP ext load and store?

Yes, we don't have them:


IIUC it should be resolved once FP types are added https://discourse.llvm.org/t/rfc-globalisel-adding-fp-type-information-to-llt/83349. After this LLT types will show whether store or load is a float or integer operation. And MMO will show whether it is extending or truncating.

@e-kud e-kud requested a review from arsenm June 10, 2025 08:32
@e-kud
Copy link
Contributor Author

e-kud commented Jun 17, 2025

@RKSimon @arsenm ping

@RKSimon
Copy link
Collaborator

RKSimon commented Jul 14, 2025

@e-kud please can you merge against trunk latest

@e-kud
Copy link
Contributor Author

e-kud commented Jul 15, 2025

@e-kud please can you merge against trunk latest

Done. It's just a conflict with G_GET_ROUNDING as both patches add to the end.

@e-kud
Copy link
Contributor Author

e-kud commented Jul 29, 2025

@RKSimon @arsenm ping

@e-kud
Copy link
Contributor Author

e-kud commented Sep 1, 2025

@RKSimon @arsenm ping

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please update to fix CI failure

@e-kud
Copy link
Contributor Author

e-kud commented Oct 9, 2025

@RKSimon @arsenm ping

widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
widenScalarDst(MI, WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
widenScalarDst(MI, WideTy, 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// fp conversions using truncating and extending loads and stores.
// Floating-point conversions using truncating and extending loads and stores.

LegalizerHelper::lowerFPExtAndTruncMem(MachineInstr &MI) {
assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
"Only G_FPEXT and G_FPTRUNC are expected");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can't just use a regular load and store. We should have a proper FP extending load opcode (like G_ZEXTLOAD), and also need the truncate case

return legalizeFPExtAndTrunc(MI, MRI, Helper);
case TargetOpcode::G_GET_ROUNDING:
return legalizeGETROUNDING(MI, MRI, Helper);
case TargetOpcode::G_SET_ROUNDING:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you rebase to drop this unrelated stuff that already was submitted

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants