From a603218cf79bd14304479f56f3349c7075e9e696 Mon Sep 17 00:00:00 2001 From: Evgenii Kudriashov Date: Tue, 27 May 2025 06:51:53 -0700 Subject: [PATCH 1/3] Precommit tests --- .../CodeGen/X86/GlobalISel/fpext-scalar.ll | 12 -- llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll | 177 ++++++++++++++++++ 2 files changed, 177 insertions(+), 12 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll create mode 100644 llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll diff --git a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll deleted file mode 100644 index 8501009e2915a..0000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll +++ /dev/null @@ -1,12 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK - -define double @test(float %a) { -; CHECK-LABEL: test: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 -; CHECK-NEXT: retq -entry: - %conv = fpext float %a to double - ret double %conv -} diff --git a/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll new file mode 100644 index 0000000000000..717802110c6a6 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll @@ -0,0 +1,177 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,SDAG-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,SDAG-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX +; COMM: FastISel has troubles with fp80 type +; RUN: llc < %s -mtriple=i686-unknown-unknown -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,FAST-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,FAST-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX +; RUN: llc < %s -mtriple=i686-unknown-unknown -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes X86,GLOBAL-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes SSE,GLOBAL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX + +define double @fpext_float_to_double(float %f) { +; X86-LABEL: fpext_float_to_double: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: retl +; +; SSE-LABEL: fpext_float_to_double: +; SSE: # %bb.0: +; SSE-NEXT: cvtss2sd %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: fpext_float_to_double: +; AVX: # %bb.0: +; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = fpext float %f to double + ret double %1 +} + +define x86_fp80 @fpext_float_to_x86_fp80(float %f) { +; X86-LABEL: fpext_float_to_x86_fp80: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: retl +; +; SSE-LABEL: fpext_float_to_x86_fp80: +; SSE: # %bb.0: +; SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: flds -{{[0-9]+}}(%rsp) +; SSE-NEXT: retq +; +; AVX-LABEL: fpext_float_to_x86_fp80: +; AVX: # %bb.0: +; AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: flds -{{[0-9]+}}(%rsp) +; AVX-NEXT: retq + %1 = fpext float %f to x86_fp80 + ret x86_fp80 %1 +} + +define x86_fp80 @fpext_double_to_x86_fp80(double %d) { +; X86-LABEL: fpext_double_to_x86_fp80: +; X86: # %bb.0: +; X86-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NEXT: retl +; +; SSE-LABEL: fpext_double_to_x86_fp80: +; SSE: # %bb.0: +; SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: fldl -{{[0-9]+}}(%rsp) +; SSE-NEXT: retq +; +; AVX-LABEL: fpext_double_to_x86_fp80: +; AVX: # %bb.0: +; AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: fldl -{{[0-9]+}}(%rsp) +; AVX-NEXT: retq + %1 = fpext double %d to x86_fp80 + ret x86_fp80 %1 +} + +define float @fptrunc_double_to_float(double %d) { +; X86-LABEL: fptrunc_double_to_float: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NEXT: fstps (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; SSE-LABEL: fptrunc_double_to_float: +; SSE: # %bb.0: +; SSE-NEXT: cvtsd2ss %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: fptrunc_double_to_float: +; AVX: # %bb.0: +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = fptrunc double %d to float + ret float %1 +} + +define float @fptrunc_x86_fp80_to_float(x86_fp80 %x) { +; X86-LABEL: fptrunc_x86_fp80_to_float: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstps (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; SSE-LABEL: fptrunc_x86_fp80_to_float: +; SSE: # %bb.0: +; SSE-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE-NEXT: fstps -{{[0-9]+}}(%rsp) +; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: retq +; +; AVX-LABEL: fptrunc_x86_fp80_to_float: +; AVX: # %bb.0: +; AVX-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX-NEXT: fstps -{{[0-9]+}}(%rsp) +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: retq + %1 = fptrunc x86_fp80 %x to float + ret float %1 +} + +define double @fptrunc_x86_fp80_to_double(x86_fp80 %x) { +; X86-LABEL: fptrunc_x86_fp80_to_double: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: fldt 8(%ebp) +; X86-NEXT: fstpl (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl +; +; SSE-LABEL: fptrunc_x86_fp80_to_double: +; SSE: # %bb.0: +; SSE-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE-NEXT: fstpl -{{[0-9]+}}(%rsp) +; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: retq +; +; AVX-LABEL: fptrunc_x86_fp80_to_double: +; AVX: # %bb.0: +; AVX-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX-NEXT: fstpl -{{[0-9]+}}(%rsp) +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: retq + %1 = fptrunc x86_fp80 %x to double + ret double %1 +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; FAST-AVX: {{.*}} +; FAST-SSE: {{.*}} +; FAST-X86: {{.*}} +; FASTSDAG-AVX: {{.*}} +; FASTSDAG-SSE: {{.*}} +; FASTSDAG-X86: {{.*}} +; GLOBAL-AVX: {{.*}} +; GLOBAL-SSE: {{.*}} +; GLOBAL-X86: {{.*}} +; SDAG-AVX: {{.*}} +; SDAG-SSE: {{.*}} +; SDAG-X86: {{.*}} From cd09cc331958ac543785a7de80ae68a4bd5d7887 Mon Sep 17 00:00:00 2001 From: Evgenii Kudriashov Date: Tue, 27 May 2025 06:57:16 -0700 Subject: [PATCH 2/3] [X86][GlobalISel] Support fp80 for G_FPTRUNC and G_FPEXT --- .../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 61 ++++- llvm/lib/Target/X86/GISel/X86LegalizerInfo.h | 3 + llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll | 244 +++++++++++++----- 3 files changed, 224 insertions(+), 84 deletions(-) diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index f008cb1bea839..58215d4e00202 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -376,9 +376,15 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, Action.legalForTypesWithMemDesc({{s8, p0, s8, 1}, {s16, p0, s16, 1}, {s32, p0, s32, 1}, - {s80, p0, s80, 1}, {p0, p0, p0, 1}, {v4s8, p0, v4s8, 1}}); + + if (UseX87) + Action.legalForTypesWithMemDesc({{s80, p0, s32, 1}, + {s80, p0, s64, 1}, + {s32, p0, s80, 1}, + {s64, p0, s80, 1}, + {s80, p0, s80, 1}}); if (Is64Bit) Action.legalForTypesWithMemDesc( {{s64, p0, s64, 1}, {v2s32, p0, v2s32, 1}}); @@ -476,18 +482,17 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .widenScalarToNextPow2(1); // fp conversions - getActionDefinitionsBuilder(G_FPEXT).legalIf([=](const LegalityQuery &Query) { - return (HasSSE2 && typePairInSet(0, 1, {{s64, s32}})(Query)) || - (HasAVX && typePairInSet(0, 1, {{v4s64, v4s32}})(Query)) || - (HasAVX512 && typePairInSet(0, 1, {{v8s64, v8s32}})(Query)); - }); - - getActionDefinitionsBuilder(G_FPTRUNC).legalIf( - [=](const LegalityQuery &Query) { - return (HasSSE2 && typePairInSet(0, 1, {{s32, s64}})(Query)) || - (HasAVX && typePairInSet(0, 1, {{v4s32, v4s64}})(Query)) || - (HasAVX512 && typePairInSet(0, 1, {{v8s32, v8s64}})(Query)); - }); + getActionDefinitionsBuilder(G_FPEXT) + .legalFor(HasSSE2, {{s64, s32}}) + .legalFor(HasAVX, {{v4s64, v4s32}}) + .legalFor(HasAVX512, {{v8s64, v8s32}}) + .customFor(UseX87, {{s64, s32}, {s80, s32}, {s80, s64}}); + + getActionDefinitionsBuilder(G_FPTRUNC) + .legalFor(HasSSE2, {{s32, s64}}) + .legalFor(HasAVX, {{v4s32, v4s64}}) + .legalFor(HasAVX512, {{v8s32, v8s64}}) + .customFor(UseX87, {{s32, s64}, {s32, s80}, {s64, s80}}); getActionDefinitionsBuilder(G_SITOFP) .legalIf([=](const LegalityQuery &Query) { @@ -671,6 +676,9 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, return legalizeUITOFP(MI, MRI, Helper); case TargetOpcode::G_STORE: return legalizeNarrowingStore(MI, MRI, Helper); + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPTRUNC: + return legalizeFPExtAndTrunc(MI, MRI, Helper); } llvm_unreachable("expected switch to return"); } @@ -781,6 +789,33 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI, return true; } +bool X86LegalizerInfo::legalizeFPExtAndTrunc(MachineInstr &MI, + MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const { + assert((MI.getOpcode() == TargetOpcode::G_FPEXT || + MI.getOpcode() == TargetOpcode::G_FPTRUNC) && + "Only G_FPEXT and G_FPTRUNC are expected"); + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); + MachinePointerInfo PtrInfo; + LLT StackTy = MI.getOpcode() == TargetOpcode::G_FPEXT ? SrcTy : DstTy; + Align StackTyAlign = Helper.getStackTemporaryAlignment(StackTy); + auto StackTemp = Helper.createStackTemporary(StackTy.getSizeInBytes(), + StackTyAlign, PtrInfo); + + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + MachineFunction &MF = MIRBuilder.getMF(); + auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + StackTy, StackTyAlign); + MIRBuilder.buildStore(SrcReg, StackTemp, *StoreMMO); + + auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + StackTy, StackTyAlign); + MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO); + + MI.eraseFromParent(); + return true; +} + bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { return true; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h index 54f776456397b..b224f3f46a2d5 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h @@ -48,6 +48,9 @@ class X86LegalizerInfo : public LegalizerInfo { bool legalizeNarrowingStore(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; + + bool legalizeFPExtAndTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; }; } // namespace llvm #endif diff --git a/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll index 717802110c6a6..0ad9b90806ce9 100644 --- a/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll +++ b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll @@ -8,10 +8,11 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,FAST-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX +; COMM: GlobalISel can't legalize double stores on 32bit platform due to lack of double/integer distinguish during legalization ; RUN: llc < %s -mtriple=i686-unknown-unknown -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes X86,GLOBAL-X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes SSE,GLOBAL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes SSE,GLOBAL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX define double @fpext_float_to_double(float %f) { ; X86-LABEL: fpext_float_to_double: @@ -33,58 +34,118 @@ define double @fpext_float_to_double(float %f) { } define x86_fp80 @fpext_float_to_x86_fp80(float %f) { -; X86-LABEL: fpext_float_to_x86_fp80: -; X86: # %bb.0: -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: retl +; FASTSDAG-X86-LABEL: fpext_float_to_x86_fp80: +; FASTSDAG-X86: # %bb.0: +; FASTSDAG-X86-NEXT: flds {{[0-9]+}}(%esp) +; FASTSDAG-X86-NEXT: retl ; -; SSE-LABEL: fpext_float_to_x86_fp80: -; SSE: # %bb.0: -; SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: flds -{{[0-9]+}}(%rsp) -; SSE-NEXT: retq +; FASTSDAG-SSE-LABEL: fpext_float_to_x86_fp80: +; FASTSDAG-SSE: # %bb.0: +; FASTSDAG-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; FASTSDAG-SSE-NEXT: flds -{{[0-9]+}}(%rsp) +; FASTSDAG-SSE-NEXT: retq ; -; AVX-LABEL: fpext_float_to_x86_fp80: -; AVX: # %bb.0: -; AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) -; AVX-NEXT: flds -{{[0-9]+}}(%rsp) -; AVX-NEXT: retq +; FASTSDAG-AVX-LABEL: fpext_float_to_x86_fp80: +; FASTSDAG-AVX: # %bb.0: +; FASTSDAG-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; FASTSDAG-AVX-NEXT: flds -{{[0-9]+}}(%rsp) +; FASTSDAG-AVX-NEXT: retq +; +; GLOBAL-X86-LABEL: fpext_float_to_x86_fp80: +; GLOBAL-X86: # %bb.0: +; GLOBAL-X86-NEXT: pushl %eax +; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 8 +; GLOBAL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GLOBAL-X86-NEXT: movl %eax, (%esp) +; GLOBAL-X86-NEXT: flds (%esp) +; GLOBAL-X86-NEXT: popl %eax +; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 4 +; GLOBAL-X86-NEXT: retl +; +; GLOBAL-SSE-LABEL: fpext_float_to_x86_fp80: +; GLOBAL-SSE: # %bb.0: +; GLOBAL-SSE-NEXT: movd %xmm0, %eax +; GLOBAL-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; GLOBAL-SSE-NEXT: flds -{{[0-9]+}}(%rsp) +; GLOBAL-SSE-NEXT: retq +; +; GLOBAL-AVX-LABEL: fpext_float_to_x86_fp80: +; GLOBAL-AVX: # %bb.0: +; GLOBAL-AVX-NEXT: vmovd %xmm0, %eax +; GLOBAL-AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; GLOBAL-AVX-NEXT: flds -{{[0-9]+}}(%rsp) +; GLOBAL-AVX-NEXT: retq %1 = fpext float %f to x86_fp80 ret x86_fp80 %1 } define x86_fp80 @fpext_double_to_x86_fp80(double %d) { -; X86-LABEL: fpext_double_to_x86_fp80: -; X86: # %bb.0: -; X86-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NEXT: retl +; FASTSDAG-X86-LABEL: fpext_double_to_x86_fp80: +; FASTSDAG-X86: # %bb.0: +; FASTSDAG-X86-NEXT: fldl {{[0-9]+}}(%esp) +; FASTSDAG-X86-NEXT: retl ; -; SSE-LABEL: fpext_double_to_x86_fp80: -; SSE: # %bb.0: -; SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: fldl -{{[0-9]+}}(%rsp) -; SSE-NEXT: retq +; FASTSDAG-SSE-LABEL: fpext_double_to_x86_fp80: +; FASTSDAG-SSE: # %bb.0: +; FASTSDAG-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; FASTSDAG-SSE-NEXT: fldl -{{[0-9]+}}(%rsp) +; FASTSDAG-SSE-NEXT: retq ; -; AVX-LABEL: fpext_double_to_x86_fp80: -; AVX: # %bb.0: -; AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) -; AVX-NEXT: fldl -{{[0-9]+}}(%rsp) -; AVX-NEXT: retq +; FASTSDAG-AVX-LABEL: fpext_double_to_x86_fp80: +; FASTSDAG-AVX: # %bb.0: +; FASTSDAG-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) +; FASTSDAG-AVX-NEXT: fldl -{{[0-9]+}}(%rsp) +; FASTSDAG-AVX-NEXT: retq +; +; GLOBAL-X86-LABEL: fpext_double_to_x86_fp80: +; GLOBAL-X86: # %bb.0: +; GLOBAL-X86-NEXT: pushl %ebp +; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 8 +; GLOBAL-X86-NEXT: .cfi_offset %ebp, -8 +; GLOBAL-X86-NEXT: movl %esp, %ebp +; GLOBAL-X86-NEXT: .cfi_def_cfa_register %ebp +; GLOBAL-X86-NEXT: andl $-8, %esp +; GLOBAL-X86-NEXT: subl $8, %esp +; GLOBAL-X86-NEXT: leal 8(%ebp), %eax +; GLOBAL-X86-NEXT: movl 8(%ebp), %ecx +; GLOBAL-X86-NEXT: movl 4(%eax), %eax +; GLOBAL-X86-NEXT: movl %esp, %edx +; GLOBAL-X86-NEXT: movl %ecx, (%esp) +; GLOBAL-X86-NEXT: movl %eax, 4(%edx) +; GLOBAL-X86-NEXT: fldl (%esp) +; GLOBAL-X86-NEXT: movl %ebp, %esp +; GLOBAL-X86-NEXT: popl %ebp +; GLOBAL-X86-NEXT: .cfi_def_cfa %esp, 4 +; GLOBAL-X86-NEXT: retl +; +; GLOBAL-SSE-LABEL: fpext_double_to_x86_fp80: +; GLOBAL-SSE: # %bb.0: +; GLOBAL-SSE-NEXT: movq %xmm0, %rax +; GLOBAL-SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; GLOBAL-SSE-NEXT: fldl -{{[0-9]+}}(%rsp) +; GLOBAL-SSE-NEXT: retq +; +; GLOBAL-AVX-LABEL: fpext_double_to_x86_fp80: +; GLOBAL-AVX: # %bb.0: +; GLOBAL-AVX-NEXT: vmovq %xmm0, %rax +; GLOBAL-AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; GLOBAL-AVX-NEXT: fldl -{{[0-9]+}}(%rsp) +; GLOBAL-AVX-NEXT: retq %1 = fpext double %d to x86_fp80 ret x86_fp80 %1 } define float @fptrunc_double_to_float(double %d) { -; X86-LABEL: fptrunc_double_to_float: -; X86: # %bb.0: -; X86-NEXT: pushl %eax -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NEXT: fstps (%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: popl %eax -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl +; FASTSDAG-X86-LABEL: fptrunc_double_to_float: +; FASTSDAG-X86: # %bb.0: +; FASTSDAG-X86-NEXT: pushl %eax +; FASTSDAG-X86-NEXT: .cfi_def_cfa_offset 8 +; FASTSDAG-X86-NEXT: fldl {{[0-9]+}}(%esp) +; FASTSDAG-X86-NEXT: fstps (%esp) +; FASTSDAG-X86-NEXT: flds (%esp) +; FASTSDAG-X86-NEXT: popl %eax +; FASTSDAG-X86-NEXT: .cfi_def_cfa_offset 4 +; FASTSDAG-X86-NEXT: retl ; ; SSE-LABEL: fptrunc_double_to_float: ; SSE: # %bb.0: @@ -95,6 +156,21 @@ define float @fptrunc_double_to_float(double %d) { ; AVX: # %bb.0: ; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; GLOBAL-X86-LABEL: fptrunc_double_to_float: +; GLOBAL-X86: # %bb.0: +; GLOBAL-X86-NEXT: pushl %eax +; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 8 +; GLOBAL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; GLOBAL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GLOBAL-X86-NEXT: movl 4(%eax), %eax +; GLOBAL-X86-NEXT: movl %esp, %edx +; GLOBAL-X86-NEXT: movl %ecx, (%esp) +; GLOBAL-X86-NEXT: movl %eax, 4(%edx) +; GLOBAL-X86-NEXT: flds (%esp) +; GLOBAL-X86-NEXT: popl %eax +; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 4 +; GLOBAL-X86-NEXT: retl %1 = fptrunc double %d to float ret float %1 } @@ -111,19 +187,35 @@ define float @fptrunc_x86_fp80_to_float(x86_fp80 %x) { ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; -; SSE-LABEL: fptrunc_x86_fp80_to_float: -; SSE: # %bb.0: -; SSE-NEXT: fldt {{[0-9]+}}(%rsp) -; SSE-NEXT: fstps -{{[0-9]+}}(%rsp) -; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: retq +; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_float: +; FASTSDAG-SSE: # %bb.0: +; FASTSDAG-SSE-NEXT: fldt {{[0-9]+}}(%rsp) +; FASTSDAG-SSE-NEXT: fstps -{{[0-9]+}}(%rsp) +; FASTSDAG-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; FASTSDAG-SSE-NEXT: retq ; -; AVX-LABEL: fptrunc_x86_fp80_to_float: -; AVX: # %bb.0: -; AVX-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX-NEXT: fstps -{{[0-9]+}}(%rsp) -; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: retq +; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_float: +; FASTSDAG-AVX: # %bb.0: +; FASTSDAG-AVX-NEXT: fldt {{[0-9]+}}(%rsp) +; FASTSDAG-AVX-NEXT: fstps -{{[0-9]+}}(%rsp) +; FASTSDAG-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; FASTSDAG-AVX-NEXT: retq +; +; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_float: +; GLOBAL-SSE: # %bb.0: +; GLOBAL-SSE-NEXT: fldt {{[0-9]+}}(%rsp) +; GLOBAL-SSE-NEXT: fstps -{{[0-9]+}}(%rsp) +; GLOBAL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; GLOBAL-SSE-NEXT: movd %eax, %xmm0 +; GLOBAL-SSE-NEXT: retq +; +; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_float: +; GLOBAL-AVX: # %bb.0: +; GLOBAL-AVX-NEXT: fldt {{[0-9]+}}(%rsp) +; GLOBAL-AVX-NEXT: fstps -{{[0-9]+}}(%rsp) +; GLOBAL-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; GLOBAL-AVX-NEXT: vmovd %eax, %xmm0 +; GLOBAL-AVX-NEXT: retq %1 = fptrunc x86_fp80 %x to float ret float %1 } @@ -146,19 +238,35 @@ define double @fptrunc_x86_fp80_to_double(x86_fp80 %x) { ; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl ; -; SSE-LABEL: fptrunc_x86_fp80_to_double: -; SSE: # %bb.0: -; SSE-NEXT: fldt {{[0-9]+}}(%rsp) -; SSE-NEXT: fstpl -{{[0-9]+}}(%rsp) -; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: retq +; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_double: +; FASTSDAG-SSE: # %bb.0: +; FASTSDAG-SSE-NEXT: fldt {{[0-9]+}}(%rsp) +; FASTSDAG-SSE-NEXT: fstpl -{{[0-9]+}}(%rsp) +; FASTSDAG-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; FASTSDAG-SSE-NEXT: retq ; -; AVX-LABEL: fptrunc_x86_fp80_to_double: -; AVX: # %bb.0: -; AVX-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX-NEXT: fstpl -{{[0-9]+}}(%rsp) -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: retq +; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_double: +; FASTSDAG-AVX: # %bb.0: +; FASTSDAG-AVX-NEXT: fldt {{[0-9]+}}(%rsp) +; FASTSDAG-AVX-NEXT: fstpl -{{[0-9]+}}(%rsp) +; FASTSDAG-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; FASTSDAG-AVX-NEXT: retq +; +; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_double: +; GLOBAL-SSE: # %bb.0: +; GLOBAL-SSE-NEXT: fldt {{[0-9]+}}(%rsp) +; GLOBAL-SSE-NEXT: fstpl -{{[0-9]+}}(%rsp) +; GLOBAL-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; GLOBAL-SSE-NEXT: movq %rax, %xmm0 +; GLOBAL-SSE-NEXT: retq +; +; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_double: +; GLOBAL-AVX: # %bb.0: +; GLOBAL-AVX-NEXT: fldt {{[0-9]+}}(%rsp) +; GLOBAL-AVX-NEXT: fstpl -{{[0-9]+}}(%rsp) +; GLOBAL-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; GLOBAL-AVX-NEXT: vmovq %rax, %xmm0 +; GLOBAL-AVX-NEXT: retq %1 = fptrunc x86_fp80 %x to double ret double %1 } @@ -166,12 +274,6 @@ define double @fptrunc_x86_fp80_to_double(x86_fp80 %x) { ; FAST-AVX: {{.*}} ; FAST-SSE: {{.*}} ; FAST-X86: {{.*}} -; FASTSDAG-AVX: {{.*}} -; FASTSDAG-SSE: {{.*}} -; FASTSDAG-X86: {{.*}} -; GLOBAL-AVX: {{.*}} -; GLOBAL-SSE: {{.*}} -; GLOBAL-X86: {{.*}} ; SDAG-AVX: {{.*}} ; SDAG-SSE: {{.*}} ; SDAG-X86: {{.*}} From 80998165bbd4f2d12444ce89c5238d260bac45b3 Mon Sep 17 00:00:00 2001 From: Evgenii Kudriashov Date: Thu, 25 Sep 2025 18:05:47 -0700 Subject: [PATCH 3/3] Generalize FPTRUNC/FPEXT lowering using mem --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 + .../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 6 ++++ .../CodeGen/GlobalISel/LegalizerHelper.cpp | 31 ++++++++++++++++- .../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 34 ++----------------- llvm/lib/Target/X86/GISel/X86LegalizerInfo.h | 3 -- 5 files changed, 39 insertions(+), 36 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 22569aab236af..690f40c51fdd2 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -483,6 +483,7 @@ class LegalizerHelper { LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI); LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI); + LLVM_ABI LegalizeResult lowerFPExtAndTruncMem(MachineInstr &MI); LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI); LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI); LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index fd72a3898562e..836e87de6f62d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -742,6 +742,12 @@ class LegalizeRuleSet { LegalizeRuleSet &lowerFor(std::initializer_list> Types) { return actionFor(LegalizeAction::Lower, Types); } + LegalizeRuleSet &lowerFor(bool Pred, + std::initializer_list> Types) { + if (!Pred) + return *this; + return actionFor(LegalizeAction::Lower, Types); + } /// The instruction is lowered when type indexes 0 and 1 is any type pair in /// the given list. LegalizeRuleSet &lowerFor(std::initializer_list> Types, diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f3e036ed1b947..86ca857f92f24 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4667,6 +4667,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case G_FPTOUI_SAT: case G_FPTOSI_SAT: return lowerFPTOINT_SAT(MI); + case G_FPEXT: + return lowerFPExtAndTruncMem(MI); case G_FPTRUNC: return lowerFPTRUNC(MI); case G_FPOWI: @@ -8408,6 +8410,33 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) { return Legalized; } +// fp conversions using truncating and extending loads and stores. +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFPExtAndTruncMem(MachineInstr &MI) { + assert((MI.getOpcode() == TargetOpcode::G_FPEXT || + MI.getOpcode() == TargetOpcode::G_FPTRUNC) && + "Only G_FPEXT and G_FPTRUNC are expected"); + + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); + MachinePointerInfo PtrInfo; + LLT StackTy = MI.getOpcode() == TargetOpcode::G_FPEXT ? SrcTy : DstTy; + Align StackTyAlign = getStackTemporaryAlignment(StackTy); + auto StackTemp = + createStackTemporary(StackTy.getSizeInBytes(), StackTyAlign, PtrInfo); + + MachineFunction &MF = MIRBuilder.getMF(); + auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + StackTy, StackTyAlign); + MIRBuilder.buildStore(SrcReg, StackTemp, *StoreMMO); + + auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + StackTy, StackTyAlign); + MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO); + + MI.eraseFromParent(); + return Legalized; +} + // f64 -> f16 conversion using round-to-nearest-even rounding mode. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { @@ -8533,7 +8562,7 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64) return lowerFPTRUNC_F64_TO_F16(MI); - return UnableToLegalize; + return lowerFPExtAndTruncMem(MI); } LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) { diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 128ffbf7e49c3..e9bc5834ea69c 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -447,13 +447,13 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .legalFor(HasSSE2, {{s64, s32}}) .legalFor(HasAVX, {{v4s64, v4s32}}) .legalFor(HasAVX512, {{v8s64, v8s32}}) - .customFor(UseX87, {{s64, s32}, {s80, s32}, {s80, s64}}); + .lowerFor(UseX87, {{s64, s32}, {s80, s32}, {s80, s64}}); getActionDefinitionsBuilder(G_FPTRUNC) .legalFor(HasSSE2, {{s32, s64}}) .legalFor(HasAVX, {{v4s32, v4s64}}) .legalFor(HasAVX512, {{v8s32, v8s64}}) - .customFor(UseX87, {{s32, s64}, {s32, s80}, {s64, s80}}); + .lowerFor(UseX87, {{s32, s64}, {s32, s80}, {s64, s80}}); getActionDefinitionsBuilder(G_SITOFP) .legalFor(HasSSE1, {{s32, s32}}) @@ -623,9 +623,6 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, return legalizeSITOFP(MI, MRI, Helper); case TargetOpcode::G_FPTOSI: return legalizeFPTOSI(MI, MRI, Helper); - case TargetOpcode::G_FPEXT: - case TargetOpcode::G_FPTRUNC: - return legalizeFPExtAndTrunc(MI, MRI, Helper); case TargetOpcode::G_GET_ROUNDING: return legalizeGETROUNDING(MI, MRI, Helper); } @@ -870,33 +867,6 @@ bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI, return true; } -bool X86LegalizerInfo::legalizeFPExtAndTrunc(MachineInstr &MI, - MachineRegisterInfo &MRI, - LegalizerHelper &Helper) const { - assert((MI.getOpcode() == TargetOpcode::G_FPEXT || - MI.getOpcode() == TargetOpcode::G_FPTRUNC) && - "Only G_FPEXT and G_FPTRUNC are expected"); - auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); - MachinePointerInfo PtrInfo; - LLT StackTy = MI.getOpcode() == TargetOpcode::G_FPEXT ? SrcTy : DstTy; - Align StackTyAlign = Helper.getStackTemporaryAlignment(StackTy); - auto StackTemp = Helper.createStackTemporary(StackTy.getSizeInBytes(), - StackTyAlign, PtrInfo); - - MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; - MachineFunction &MF = MIRBuilder.getMF(); - auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - StackTy, StackTyAlign); - MIRBuilder.buildStore(SrcReg, StackTemp, *StoreMMO); - - auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - StackTy, StackTyAlign); - MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO); - - MI.eraseFromParent(); - return true; -} - bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { return true; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h index 7c00531d371e7..0003552d70ee0 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h @@ -57,9 +57,6 @@ class X86LegalizerInfo : public LegalizerInfo { bool legalizeGETROUNDING(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; - - bool legalizeFPExtAndTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, - LegalizerHelper &Helper) const; }; } // namespace llvm #endif