Skip to content

Commit c8792ad

Browse files
author
mattarde
committed
update comef opt on 10_2
1 parent 8673d0e commit c8792ad

File tree

4 files changed

+154
-0
lines changed

4 files changed

+154
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49520,6 +49520,15 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
4952049520
// FIXME: need symbolic constants for these magic numbers.
4952149521
// See X86ATTInstPrinter.cpp:printSSECC().
4952249522
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
49523+
49524+
// VCOMXSS simplifies conditional code sequence into single setcc node
49525+
// and a CC node, Earlier until COMI, it required 2 SETCC's
49526+
if (Subtarget.hasAVX10_2()) {
49527+
return getSETCC(
49528+
((cc0 == X86::COND_E) ? X86::COND_E : X86::COND_NE),
49529+
DAG.getNode(X86ISD::UCOMX, DL, MVT::i32, CMP00, CMP01), DL,
49530+
DAG);
49531+
}
4952349532
if (Subtarget.hasAVX512()) {
4952449533
SDValue FSetCC =
4952549534
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,

llvm/lib/Target/X86/X86InstrAVX10.td

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1541,6 +1541,24 @@ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_
15411541
//-------------------------------------------------
15421542
// AVX10 COMEF instructions
15431543
//-------------------------------------------------
1544+
multiclass avx10_com_ef<bits<8> Opc, RegisterClass RC, ValueType VT,
1545+
SDPatternOperator OpNode, string OpcodeStr,
1546+
X86MemOperand x86memop, PatFrag ld_frag,
1547+
Domain d, X86FoldableSchedWrite sched = WriteFComX>{
1548+
let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in {
1549+
def rr : AVX512<Opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
1550+
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1551+
[(set EFLAGS, (OpNode (VT RC:$src1), RC:$src2))]>,
1552+
EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
1553+
let mayLoad = 1 in {
1554+
def rm : AVX512<Opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
1555+
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1556+
[(set EFLAGS, (OpNode (VT RC:$src1), (ld_frag addr:$src2)))]>,
1557+
EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1558+
}
1559+
}
1560+
}
1561+
15441562
multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
15451563
string OpcodeStr,
15461564
Domain d,
@@ -1564,6 +1582,11 @@ multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
15641582
}
15651583

15661584
let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in {
1585+
1586+
defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512,
1587+
"vucomxss", f32mem, loadf32, SSEPackedSingle>,
1588+
TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1589+
15671590
defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
15681591
"vcomxsd", SSEPackedDouble>,
15691592
TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=AVX10_2_X64
3+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=AVX10_2_X86
4+
5+
define i1 @oeq(float %x, float %y) {
6+
; AVX10_2_X64-LABEL: oeq:
7+
; AVX10_2_X64: # %bb.0:
8+
; AVX10_2_X64-NEXT: vucomxss %xmm1, %xmm0
9+
; AVX10_2_X64-NEXT: sete %al
10+
; AVX10_2_X64-NEXT: retq
11+
;
12+
; AVX10_2_X86-LABEL: oeq:
13+
; AVX10_2_X86: # %bb.0:
14+
; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
15+
; AVX10_2_X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
16+
; AVX10_2_X86-NEXT: sete %al
17+
; AVX10_2_X86-NEXT: retl
18+
%1 = fcmp oeq float %x, %y
19+
ret i1 %1
20+
}
21+
22+
define i1 @une(float %x, float %y) {
23+
; AVX10_2_X64-LABEL: une:
24+
; AVX10_2_X64: # %bb.0:
25+
; AVX10_2_X64-NEXT: vucomxss %xmm1, %xmm0
26+
; AVX10_2_X64-NEXT: setne %al
27+
; AVX10_2_X64-NEXT: retq
28+
;
29+
; AVX10_2_X86-LABEL: une:
30+
; AVX10_2_X86: # %bb.0:
31+
; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
32+
; AVX10_2_X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
33+
; AVX10_2_X86-NEXT: setne %al
34+
; AVX10_2_X86-NEXT: retl
35+
%1 = fcmp une float %x, %y
36+
ret i1 %1
37+
}
38+
39+
define i1 @ogt(float %x, float %y) {
40+
; AVX10_2_X64-LABEL: ogt:
41+
; AVX10_2_X64: # %bb.0:
42+
; AVX10_2_X64-NEXT: vucomiss %xmm1, %xmm0
43+
; AVX10_2_X64-NEXT: seta %al
44+
; AVX10_2_X64-NEXT: retq
45+
;
46+
; AVX10_2_X86-LABEL: ogt:
47+
; AVX10_2_X86: # %bb.0:
48+
; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
49+
; AVX10_2_X86-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0
50+
; AVX10_2_X86-NEXT: seta %al
51+
; AVX10_2_X86-NEXT: retl
52+
%1 = fcmp ogt float %x, %y
53+
ret i1 %1
54+
}
55+
56+
define i1 @oeq_mem(ptr %xp, ptr %yp) {
57+
; AVX10_2_X64-LABEL: oeq_mem:
58+
; AVX10_2_X64: # %bb.0:
59+
; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
60+
; AVX10_2_X64-NEXT: vucomxss (%rsi), %xmm0
61+
; AVX10_2_X64-NEXT: sete %al
62+
; AVX10_2_X64-NEXT: retq
63+
;
64+
; AVX10_2_X86-LABEL: oeq_mem:
65+
; AVX10_2_X86: # %bb.0:
66+
; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
67+
; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
68+
; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
69+
; AVX10_2_X86-NEXT: vucomxss (%eax), %xmm0
70+
; AVX10_2_X86-NEXT: sete %al
71+
; AVX10_2_X86-NEXT: retl
72+
%x = load float, ptr %xp
73+
%y = load float, ptr %yp
74+
%1 = fcmp oeq float %x, %y
75+
ret i1 %1
76+
}
77+
78+
define i1 @une_mem(ptr %xp, ptr %yp) {
79+
; AVX10_2_X64-LABEL: une_mem:
80+
; AVX10_2_X64: # %bb.0:
81+
; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
82+
; AVX10_2_X64-NEXT: vucomxss (%rsi), %xmm0
83+
; AVX10_2_X64-NEXT: setne %al
84+
; AVX10_2_X64-NEXT: retq
85+
;
86+
; AVX10_2_X86-LABEL: une_mem:
87+
; AVX10_2_X86: # %bb.0:
88+
; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
89+
; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
90+
; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
91+
; AVX10_2_X86-NEXT: vucomxss (%eax), %xmm0
92+
; AVX10_2_X86-NEXT: setne %al
93+
; AVX10_2_X86-NEXT: retl
94+
%x = load float, ptr %xp
95+
%y = load float, ptr %yp
96+
%1 = fcmp une float %x, %y
97+
ret i1 %1
98+
}
99+
100+
101+
define i1 @ogt_mem(ptr %xp, ptr %yp) {
102+
; AVX10_2_X64-LABEL: ogt_mem:
103+
; AVX10_2_X64: # %bb.0:
104+
; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
105+
; AVX10_2_X64-NEXT: vucomiss (%rsi), %xmm0
106+
; AVX10_2_X64-NEXT: seta %al
107+
; AVX10_2_X64-NEXT: retq
108+
;
109+
; AVX10_2_X86-LABEL: ogt_mem:
110+
; AVX10_2_X86: # %bb.0:
111+
; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
112+
; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
113+
; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
114+
; AVX10_2_X86-NEXT: vucomiss (%eax), %xmm0
115+
; AVX10_2_X86-NEXT: seta %al
116+
; AVX10_2_X86-NEXT: retl
117+
%x = load float, ptr %xp
118+
%y = load float, ptr %yp
119+
%1 = fcmp ogt float %x, %y
120+
ret i1 %1
121+
}

llvm/test/TableGen/x86-fold-tables.inc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1961,6 +1961,7 @@ static const X86FoldTableEntry Table1[] = {
19611961
{X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE},
19621962
{X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE},
19631963
{X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE},
1964+
{X86::VUCOMXSSZrr, X86::VUCOMXSSZrm, 0},
19641965
{X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE},
19651966
{X86::XOR16ri8_ND, X86::XOR16mi8_ND, 0},
19661967
{X86::XOR16ri8_NF_ND, X86::XOR16mi8_NF_ND, 0},

0 commit comments

Comments
 (0)