X86: Support FCANONICALIZE on f64/f80 for i686 with SSE2 or AVX #123917

wzssyqa · 2025-01-22T09:52:42Z

Currently, FCANONICALIZE is not enabled for f64 with SSE2,
and is not enabled for f80 for 32bit system.
Let's enable them.

llvmbot · 2025-01-22T09:53:32Z

@llvm/pr-subscribers-backend-x86

Author: YunQiang Su (wzssyqa)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/123917.diff

2 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+1-1)
(added) llvm/test/CodeGen/X86/canonicalize-vars-f64-i686.ll (+143)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a956074e50d86f..fd1c36d2ba66bc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -334,10 +334,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
     }
     setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
+    setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
     if (Subtarget.is64Bit()) {
       setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
       setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
-      setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
     }
   }
   if (Subtarget.hasAVX10_2()) {
diff --git a/llvm/test/CodeGen/X86/canonicalize-vars-f64-i686.ll b/llvm/test/CodeGen/X86/canonicalize-vars-f64-i686.ll
new file mode 100644
index 00000000000000..6b66042bdb3146
--- /dev/null
+++ b/llvm/test/CodeGen/X86/canonicalize-vars-f64-i686.ll
@@ -0,0 +1,143 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
+; RUN: llc -mattr=+sse2 -mtriple=i686-- < %s | FileCheck %s -check-prefixes=SSE2
+; RUN: llc -mattr=+avx -mtriple=i686-- < %s | FileCheck %s -check-prefixes=AVX
+
+define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
+; SSE2-LABEL: canonicalize_fp64:
+; SSE2:       # %bb.0: # %start
+; SSE2-NEXT:    pushl %ebp
+; SSE2-NEXT:    .cfi_def_cfa_offset 8
+; SSE2-NEXT:    .cfi_offset %ebp, -8
+; SSE2-NEXT:    movl %esp, %ebp
+; SSE2-NEXT:    .cfi_def_cfa_register %ebp
+; SSE2-NEXT:    andl $-8, %esp
+; SSE2-NEXT:    subl $8, %esp
+; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT:    movapd %xmm0, %xmm2
+; SSE2-NEXT:    cmpunordsd %xmm0, %xmm2
+; SSE2-NEXT:    movapd %xmm2, %xmm3
+; SSE2-NEXT:    andpd %xmm1, %xmm3
+; SSE2-NEXT:    maxsd %xmm0, %xmm1
+; SSE2-NEXT:    andnpd %xmm1, %xmm2
+; SSE2-NEXT:    orpd %xmm3, %xmm2
+; SSE2-NEXT:    mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
+; SSE2-NEXT:    movsd %xmm2, (%esp)
+; SSE2-NEXT:    fldl (%esp)
+; SSE2-NEXT:    movl %ebp, %esp
+; SSE2-NEXT:    popl %ebp
+; SSE2-NEXT:    .cfi_def_cfa %esp, 4
+; SSE2-NEXT:    retl
+;
+; AVX-LABEL: canonicalize_fp64:
+; AVX:       # %bb.0: # %start
+; AVX-NEXT:    pushl %ebp
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    .cfi_offset %ebp, -8
+; AVX-NEXT:    movl %esp, %ebp
+; AVX-NEXT:    .cfi_def_cfa_register %ebp
+; AVX-NEXT:    andl $-8, %esp
+; AVX-NEXT:    subl $8, %esp
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT:    vmaxsd %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; AVX-NEXT:    vmovsd %xmm0, (%esp)
+; AVX-NEXT:    fldl (%esp)
+; AVX-NEXT:    movl %ebp, %esp
+; AVX-NEXT:    popl %ebp
+; AVX-NEXT:    .cfi_def_cfa %esp, 4
+; AVX-NEXT:    retl
+start:
+  %c = fcmp olt double %a, %b
+  %d = fcmp uno double %a, 0.000000e+00
+  %or.cond.i.i = or i1 %d, %c
+  %e = select i1 %or.cond.i.i, double %b, double %a
+  %f = tail call double @llvm.canonicalize.f64(double %e) #2
+  ret double %f
+}
+
+define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
+; SSE2-LABEL: v_test_canonicalize_var_f64:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; SSE2-NEXT:    movsd %xmm0, (%eax)
+; SSE2-NEXT:    retl
+;
+; AVX-LABEL: v_test_canonicalize_var_f64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; AVX-NEXT:    vmovsd %xmm0, (%eax)
+; AVX-NEXT:    retl
+  %val = load double, double addrspace(1)* %out
+  %canonicalized = call double @llvm.canonicalize.f64(double %val)
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
+
+define void @canonicalize_undef(double addrspace(1)* %out) {
+; SSE2-LABEL: canonicalize_undef:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE2-NEXT:    movl $2146959360, 4(%eax) # imm = 0x7FF80000
+; SSE2-NEXT:    movl $0, (%eax)
+; SSE2-NEXT:    retl
+;
+; AVX-LABEL: canonicalize_undef:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX-NEXT:    movl $2146959360, 4(%eax) # imm = 0x7FF80000
+; AVX-NEXT:    movl $0, (%eax)
+; AVX-NEXT:    retl
+  %canonicalized = call double @llvm.canonicalize.f64(double undef)
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
+
+define <4 x double> @canon_fp64_varargsv4f64(<4 x double> %a) {
+; SSE2-LABEL: canon_fp64_varargsv4f64:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movapd {{.*#+}} xmm2 = [1.0E+0,1.0E+0]
+; SSE2-NEXT:    mulpd %xmm2, %xmm0
+; SSE2-NEXT:    mulpd %xmm2, %xmm1
+; SSE2-NEXT:    retl
+;
+; AVX-LABEL: canon_fp64_varargsv4f64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmulpd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; AVX-NEXT:    retl
+  %canonicalized = call <4 x double> @llvm.canonicalize.v4f32(<4 x double> %a)
+  ret <4 x double> %canonicalized
+}
+
+define void @vec_canonicalize_var_v4f64(<4 x double> addrspace(1)* %out) #1 {
+; SSE2-LABEL: vec_canonicalize_var_v4f64:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE2-NEXT:    movapd {{.*#+}} xmm0 = [1.0E+0,1.0E+0]
+; SSE2-NEXT:    movapd 16(%eax), %xmm1
+; SSE2-NEXT:    mulpd %xmm0, %xmm1
+; SSE2-NEXT:    mulpd (%eax), %xmm0
+; SSE2-NEXT:    movapd %xmm0, (%eax)
+; SSE2-NEXT:    movapd %xmm1, 16(%eax)
+; SSE2-NEXT:    retl
+;
+; AVX-LABEL: vec_canonicalize_var_v4f64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX-NEXT:    vmovapd (%eax), %ymm0
+; AVX-NEXT:    vmulpd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; AVX-NEXT:    vmovapd %ymm0, (%eax)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retl
+  %val = load <4 x double>, <4 x double> addrspace(1)* %out
+  %canonicalized = call <4 x double> @llvm.canonicalize.v4f32(<4 x double> %val)
+  store <4 x double> %canonicalized, <4 x double> addrspace(1)* %out
+  ret void
+}

github-actions · 2025-01-22T09:56:02Z

✅ With the latest revision this PR passed the undef deprecator.

llvm/test/CodeGen/X86/canonicalize-vars-f64-i686.ll

github-actions · 2025-01-24T05:11:22Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff a9c61e0d7655a11f45f8e94b9481193fba11302e 78a366cf40d42b4c1680e2a0df7456c5c1e9044c --extensions cpp -- llvm/lib/Target/X86/X86ISelLowering.cpp

View the diff from clang-format here.

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 832dc4d3c2..653a63804b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -888,7 +888,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::STRICT_FMUL     , MVT::f80, Legal);
     setOperationAction(ISD::STRICT_FDIV     , MVT::f80, Legal);
     setOperationAction(ISD::STRICT_FSQRT    , MVT::f80, Legal);
-    setOperationAction(ISD::FCANONICALIZE   , MVT::f80, Custom);
+    setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
     if (isTypeLegal(MVT::f16)) {
       setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);

llvm/test/CodeGen/X86/canonicalize-vars.ll

phoebewang

LGTM.

llvmbot added the backend:X86 label Jan 22, 2025

wzssyqa requested a review from arsenm January 22, 2025 09:52

arsenm reviewed Jan 22, 2025

View reviewed changes

RKSimon reviewed Jan 22, 2025

View reviewed changes

llvm/test/CodeGen/X86/canonicalize-vars-f64-i686.ll Outdated Show resolved Hide resolved

wzssyqa force-pushed the i386-sse-fcanonicalize branch from 45cf88f to 358924c Compare January 24, 2025 05:07

wzssyqa changed the title ~~X86: Support FCANONICALIZE on f64 for i686 with SSE2 or AVX~~ X86: Support FCANONICALIZE on f64/f80 for i686 with SSE2 or AVX Jan 24, 2025

wzssyqa requested review from RKSimon and arsenm January 24, 2025 05:11

phoebewang reviewed Jan 24, 2025

View reviewed changes

llvm/test/CodeGen/X86/canonicalize-vars.ll Outdated Show resolved Hide resolved

wzssyqa force-pushed the i386-sse-fcanonicalize branch from aa8bc2d to 92e6bea Compare January 24, 2025 10:02

X86: Support FCANONICALIZE on f64/f80 for i686 with SSE2 or AVX

78a366c

wzssyqa force-pushed the i386-sse-fcanonicalize branch from 92e6bea to 78a366c Compare January 24, 2025 10:04

phoebewang approved these changes Jan 24, 2025

View reviewed changes

wzssyqa merged commit bfa7de0 into llvm:main Jan 26, 2025
7 of 8 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

X86: Support FCANONICALIZE on f64/f80 for i686 with SSE2 or AVX #123917

X86: Support FCANONICALIZE on f64/f80 for i686 with SSE2 or AVX #123917

Uh oh!

wzssyqa commented Jan 22, 2025 •

edited by phoebewang

Loading

Uh oh!

llvmbot commented Jan 22, 2025

Uh oh!

github-actions bot commented Jan 22, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Jan 24, 2025 •

edited

Loading

Uh oh!

Uh oh!

phoebewang left a comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

X86: Support FCANONICALIZE on f64/f80 for i686 with SSE2 or AVX #123917

X86: Support FCANONICALIZE on f64/f80 for i686 with SSE2 or AVX #123917

Uh oh!

Conversation

wzssyqa commented Jan 22, 2025 • edited by phoebewang Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jan 22, 2025

Uh oh!

github-actions bot commented Jan 22, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Jan 24, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

phoebewang left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

wzssyqa commented Jan 22, 2025 •

edited by phoebewang

Loading

github-actions bot commented Jan 22, 2025 •

edited

Loading

github-actions bot commented Jan 24, 2025 •

edited

Loading