-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86][AVX512] Use comx for compare #113098
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1541,6 +1541,24 @@ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_ | |
| //------------------------------------------------- | ||
| // AVX10 COMEF instructions | ||
| //------------------------------------------------- | ||
| multiclass avx10_com_ef<bits<8> Opc, RegisterClass RC, ValueType VT, | ||
| SDPatternOperator OpNode, string OpcodeStr, | ||
| X86MemOperand x86memop, PatFrag ld_frag, | ||
| Domain d, X86FoldableSchedWrite sched = WriteFComX>{ | ||
| let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in { | ||
| def rr : AVX512<Opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), | ||
| !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), | ||
| [(set EFLAGS, (OpNode (VT RC:$src1), RC:$src2))]>, | ||
| EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; | ||
| let mayLoad = 1 in { | ||
| def rm : AVX512<Opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), | ||
| !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), | ||
| [(set EFLAGS, (OpNode (VT RC:$src1), (ld_frag addr:$src2)))]>, | ||
| EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode, | ||
| string OpcodeStr, | ||
| Domain d, | ||
|
|
@@ -1564,6 +1582,11 @@ multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode, | |
| } | ||
|
|
||
| let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { | ||
|
|
||
| defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512, | ||
| "vucomxss", f32mem, loadf32, SSEPackedSingle>, | ||
| TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; | ||
|
Comment on lines
+1592
to
+1594
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SD and SH? |
||
|
|
||
| defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, | ||
| "vcomxsd", SSEPackedDouble>, | ||
| TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64 | ||
| ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86 | ||
|
|
||
| define i1 @oeq(float %x, float %y) { | ||
| ; X64-LABEL: oeq: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vucomxss %xmm1, %xmm0 | ||
| ; X64-NEXT: sete %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: oeq: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 | ||
| ; X86-NEXT: sete %al | ||
| ; X86-NEXT: retl | ||
| %1 = fcmp oeq float %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @une(float %x, float %y) { | ||
| ; X64-LABEL: une: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vucomxss %xmm1, %xmm0 | ||
| ; X64-NEXT: setne %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: une: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 | ||
| ; X86-NEXT: setne %al | ||
| ; X86-NEXT: retl | ||
| %1 = fcmp une float %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @ogt(float %x, float %y) { | ||
|
||
| ; X64-LABEL: ogt: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vucomiss %xmm1, %xmm0 | ||
| ; X64-NEXT: seta %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: ogt: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X86-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0 | ||
| ; X86-NEXT: seta %al | ||
| ; X86-NEXT: retl | ||
| %1 = fcmp ogt float %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @oeq_mem(ptr %xp, ptr %yp) { | ||
| ; X64-LABEL: oeq_mem: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X64-NEXT: vucomxss (%rsi), %xmm0 | ||
| ; X64-NEXT: sete %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: oeq_mem: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||
| ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X86-NEXT: vucomxss (%eax), %xmm0 | ||
| ; X86-NEXT: sete %al | ||
| ; X86-NEXT: retl | ||
| %x = load float, ptr %xp | ||
| %y = load float, ptr %yp | ||
| %1 = fcmp oeq float %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @une_mem(ptr %xp, ptr %yp) { | ||
| ; X64-LABEL: une_mem: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X64-NEXT: vucomxss (%rsi), %xmm0 | ||
| ; X64-NEXT: setne %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: une_mem: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||
| ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X86-NEXT: vucomxss (%eax), %xmm0 | ||
| ; X86-NEXT: setne %al | ||
| ; X86-NEXT: retl | ||
| %x = load float, ptr %xp | ||
| %y = load float, ptr %yp | ||
| %1 = fcmp une float %x, %y | ||
| ret i1 %1 | ||
| } | ||
|
|
||
|
|
||
| define i1 @ogt_mem(ptr %xp, ptr %yp) { | ||
|
||
| ; X64-LABEL: ogt_mem: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X64-NEXT: vucomiss (%rsi), %xmm0 | ||
| ; X64-NEXT: seta %al | ||
| ; X64-NEXT: retq | ||
| ; | ||
| ; X86-LABEL: ogt_mem: | ||
| ; X86: # %bb.0: | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax | ||
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx | ||
| ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; X86-NEXT: vucomiss (%eax), %xmm0 | ||
| ; X86-NEXT: seta %al | ||
| ; X86-NEXT: retl | ||
| %x = load float, ptr %xp | ||
| %y = load float, ptr %yp | ||
| %1 = fcmp ogt float %x, %y | ||
| ret i1 %1 | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clang-format.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand the logic here. Why do we require 2 CC for comx? I don't see a test case using 2 CC. Can we just return
SDValue()to break combine?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On First line, I mark intent to do this legalization. COMX tests more flags and we need Single SETCC Node to infer.
Earlier Attempts used chain of SETCC like in X86ISelLowering.cpp snippet
May be instead of 2 CC, I need to write 2 Flags. Is it?
If we return
SDValue(), We selectvucomissso to get desired selectionvucomxsswe need to change this.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see. Can we generate
UCOMXthere instead of generating 2 CC and then combine?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@mahesh-attarde Did you investigate this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If i understand correctly you are asking to remove t19, t22 and t24, is that correct?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#113567 added here. Can you review?