-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86][AVX512] Use comx for compare #113567
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
09d1f74
44baff2
d242ba8
f56ad5c
cdd0050
6aa7edf
92c9471
da9493c
1f7b326
55edbe3
9f2241b
857bbe3
a855acd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -218,10 +218,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |
| setTruncStoreAction(MVT::f64, MVT::f32, Expand); | ||
|
|
||
| // SETOEQ and SETUNE require checking two conditions. | ||
| for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) { | ||
| setCondCodeAction(ISD::SETOEQ, VT, Expand); | ||
| setCondCodeAction(ISD::SETUNE, VT, Expand); | ||
| for (auto VT : {MVT::f32, MVT::f64}) { | ||
| setCondCodeAction(ISD::SETOEQ, VT, | ||
| Subtarget.hasAVX10_2() ? Custom : Expand); | ||
| setCondCodeAction(ISD::SETUNE, VT, | ||
| Subtarget.hasAVX10_2() ? Custom : Expand); | ||
| } | ||
| setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); | ||
| setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); | ||
|
|
||
| // Integer absolute. | ||
| if (Subtarget.canUseCMOV()) { | ||
|
|
@@ -2292,8 +2296,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |
| setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); | ||
| setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); | ||
|
|
||
| setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand); | ||
| setCondCodeAction(ISD::SETUNE, MVT::f16, Expand); | ||
| setCondCodeAction(ISD::SETOEQ, MVT::f16, | ||
| Subtarget.hasAVX10_2() ? Custom : Expand); | ||
| setCondCodeAction(ISD::SETUNE, MVT::f16, | ||
| Subtarget.hasAVX10_2() ? Custom : Expand); | ||
|
|
||
| if (Subtarget.useAVX512Regs()) { | ||
| setGroup(MVT::v32f16); | ||
|
|
@@ -2442,7 +2448,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |
| } | ||
| } | ||
| } | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unrelated change. |
||
| if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { | ||
| setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); | ||
| setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); | ||
|
|
@@ -24073,6 +24078,13 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { | |
| return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; | ||
| } | ||
|
|
||
| if (Subtarget.hasAVX10_2_512()) { | ||
|
||
| if (CC == ISD::SETOEQ || CC == ISD::SETUNE) { | ||
| auto NewCC = (CC == ISD::SETOEQ) ? X86::COND_E : (X86::COND_NE); | ||
| return getSETCC(NewCC, DAG.getNode(X86ISD::UCOMX, dl, MVT::i32, Op0, Op1), | ||
| dl, DAG); | ||
| } | ||
| } | ||
| // Handle floating point. | ||
| X86::CondCode CondCode = TranslateX86CC(CC, dl, /*IsFP*/ true, Op0, Op1, DAG); | ||
| if (CondCode == X86::COND_INVALID) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1541,6 +1541,24 @@ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_ | |
| //------------------------------------------------- | ||
| // AVX10 COMEF instructions | ||
| //------------------------------------------------- | ||
| multiclass avx10_com_ef<bits<8> Opc, RegisterClass RC, ValueType VT, | ||
| SDPatternOperator OpNode, string OpcodeStr, | ||
| X86MemOperand x86memop, PatFrag ld_frag, | ||
| Domain d, X86FoldableSchedWrite sched = WriteFComX>{ | ||
| let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in { | ||
| def rr : AVX512<Opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), | ||
| !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), | ||
| [(set EFLAGS, (OpNode (VT RC:$src1), RC:$src2))]>, | ||
| EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; | ||
| let mayLoad = 1 in { | ||
| def rm : AVX512<Opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), | ||
| !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), | ||
| [(set EFLAGS, (OpNode (VT RC:$src1), (ld_frag addr:$src2)))]>, | ||
| EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode, | ||
| string OpcodeStr, | ||
| Domain d, | ||
|
|
@@ -1564,6 +1582,16 @@ multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode, | |
| } | ||
|
|
||
| let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { | ||
|
|
||
|
||
| defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512, | ||
| "vucomxsd", f64mem, loadf64, SSEPackedSingle>, | ||
|
||
| TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; | ||
| defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512, | ||
| "vucomxsh", f16mem, loadf16, SSEPackedSingle>, | ||
| T_MAP5, XD, EVEX_CD8<16, CD8VT1>; | ||
| defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512, | ||
| "vucomxss", f32mem, loadf32, SSEPackedSingle>, | ||
| TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; | ||
| defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, | ||
| "vcomxsd", SSEPackedDouble>, | ||
| TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,237 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64 | ||
| ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86 | ||
|
||
|
|
||
| define i1 @hoeq(half %x, half %y) { | ||
| ; X64-LABEL: hoeq: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vucomxsh %xmm1, %xmm0 | ||
| ; X64-NEXT: sete %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: hoeq: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero | ||
| ; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0 | ||
| ; X86-NEXT: sete %al | ||
| ; X86-NEXT: retl | ||
| %1 = fcmp oeq half %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @hune(half %x, half %y) { | ||
| ; X64-LABEL: hune: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vucomxsh %xmm1, %xmm0 | ||
| ; X64-NEXT: setne %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: hune: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero | ||
| ; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0 | ||
| ; X86-NEXT: setne %al | ||
| ; X86-NEXT: retl | ||
| %1 = fcmp une half %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @hoeq_mem(ptr %xp, ptr %yp) { | ||
| ; X64-LABEL: hoeq_mem: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero | ||
| ; X64-NEXT: vucomxsh (%rsi), %xmm0 | ||
| ; X64-NEXT: sete %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: hoeq_mem: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||
| ; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero | ||
| ; X86-NEXT: vucomxsh (%eax), %xmm0 | ||
| ; X86-NEXT: sete %al | ||
| ; X86-NEXT: retl | ||
| %x = load half, ptr %xp | ||
| %y = load half, ptr %yp | ||
| %1 = fcmp oeq half %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @hune_mem(ptr %xp, ptr %yp) { | ||
| ; X64-LABEL: hune_mem: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero | ||
| ; X64-NEXT: vucomxsh (%rsi), %xmm0 | ||
| ; X64-NEXT: setne %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: hune_mem: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||
| ; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero | ||
| ; X86-NEXT: vucomxsh (%eax), %xmm0 | ||
| ; X86-NEXT: setne %al | ||
| ; X86-NEXT: retl | ||
| %x = load half, ptr %xp | ||
| %y = load half, ptr %yp | ||
| %1 = fcmp une half %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @foeq(float %x, float %y) { | ||
| ; X64-LABEL: foeq: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vucomxss %xmm1, %xmm0 | ||
| ; X64-NEXT: sete %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: foeq: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 | ||
| ; X86-NEXT: sete %al | ||
| ; X86-NEXT: retl | ||
| %1 = fcmp oeq float %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @fune(float %x, float %y) { | ||
| ; X64-LABEL: fune: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vucomxss %xmm1, %xmm0 | ||
| ; X64-NEXT: setne %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: fune: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 | ||
| ; X86-NEXT: setne %al | ||
| ; X86-NEXT: retl | ||
| %1 = fcmp une float %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @foeq_mem(ptr %xp, ptr %yp) { | ||
| ; X64-LABEL: foeq_mem: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X64-NEXT: vucomxss (%rsi), %xmm0 | ||
| ; X64-NEXT: sete %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: foeq_mem: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||
| ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X86-NEXT: vucomxss (%eax), %xmm0 | ||
| ; X86-NEXT: sete %al | ||
| ; X86-NEXT: retl | ||
| %x = load float, ptr %xp | ||
| %y = load float, ptr %yp | ||
| %1 = fcmp oeq float %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @fune_mem(ptr %xp, ptr %yp) { | ||
| ; X64-LABEL: fune_mem: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X64-NEXT: vucomxss (%rsi), %xmm0 | ||
| ; X64-NEXT: setne %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: fune_mem: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||
| ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X86-NEXT: vucomxss (%eax), %xmm0 | ||
| ; X86-NEXT: setne %al | ||
| ; X86-NEXT: retl | ||
| %x = load float, ptr %xp | ||
| %y = load float, ptr %yp | ||
| %1 = fcmp une float %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @doeq(double %x, double %y) { | ||
| ; X64-LABEL: doeq: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vucomxsd %xmm1, %xmm0 | ||
| ; X64-NEXT: sete %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: doeq: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero | ||
| ; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0 | ||
| ; X86-NEXT: sete %al | ||
| ; X86-NEXT: retl | ||
| %1 = fcmp oeq double %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @dune(double %x, double %y) { | ||
| ; X64-LABEL: dune: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vucomxsd %xmm1, %xmm0 | ||
| ; X64-NEXT: setne %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: dune: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero | ||
| ; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0 | ||
| ; X86-NEXT: setne %al | ||
| ; X86-NEXT: retl | ||
| %1 = fcmp une double %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @doeq_mem(ptr %xp, ptr %yp) { | ||
| ; X64-LABEL: doeq_mem: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero | ||
| ; X64-NEXT: vucomxsd (%rsi), %xmm0 | ||
| ; X64-NEXT: sete %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: doeq_mem: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||
| ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero | ||
| ; X86-NEXT: vucomxsd (%eax), %xmm0 | ||
| ; X86-NEXT: sete %al | ||
| ; X86-NEXT: retl | ||
| %x = load double, ptr %xp | ||
| %y = load double, ptr %yp | ||
| %1 = fcmp oeq double %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @dune_mem(ptr %xp, ptr %yp) { | ||
| ; X64-LABEL: dune_mem: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero | ||
| ; X64-NEXT: vucomxsd (%rsi), %xmm0 | ||
| ; X64-NEXT: setne %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: dune_mem: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||
| ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero | ||
| ; X86-NEXT: vucomxsd (%eax), %xmm0 | ||
| ; X86-NEXT: setne %al | ||
| ; X86-NEXT: retl | ||
| %x = load double, ptr %xp | ||
| %y = load double, ptr %yp | ||
| %1 = fcmp une double %x, %y | ||
| ret i1 %1 | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can leave it as is and override later with