-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86] Addressing Review comments #113695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
[X86] Addressing Review comments #113695
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Member
|
@llvm/pr-subscribers-backend-x86 Author: Mahesh-Attarde (mahesh-attarde) ChangesFull diff: https://github.com/llvm/llvm-project/pull/113695.diff 4 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bcb84add65d83e..d691346020a02a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -223,6 +223,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setCondCodeAction(ISD::SETUNE, VT, Expand);
}
+ if (Subtarget.hasAVX10_2()) {
+ for (auto VT : {MVT::f32, MVT::f64}) {
+ setCondCodeAction(ISD::SETOEQ, VT, Custom);
+ setCondCodeAction(ISD::SETUNE, VT, Custom);
+ }
+ }
// Integer absolute.
if (Subtarget.canUseCMOV()) {
setOperationAction(ISD::ABS , MVT::i16 , Custom);
@@ -2292,8 +2298,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
- setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f16,
+ Subtarget.hasAVX10_2() ? Custom : Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f16,
+ Subtarget.hasAVX10_2() ? Custom : Expand);
if (Subtarget.useAVX512Regs()) {
setGroup(MVT::v32f16);
@@ -24073,6 +24081,13 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
}
+ if (Subtarget.hasAVX10_2()) {
+ if (CC == ISD::SETOEQ || CC == ISD::SETUNE) {
+ auto NewCC = (CC == ISD::SETOEQ) ? X86::COND_E : (X86::COND_NE);
+ return getSETCC(NewCC, DAG.getNode(X86ISD::UCOMX, dl, MVT::i32, Op0, Op1),
+ dl, DAG);
+ }
+ }
// Handle floating point.
X86::CondCode CondCode = TranslateX86CC(CC, dl, /*IsFP*/ true, Op0, Op1, DAG);
if (CondCode == X86::COND_INVALID)
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 625f2e01d47218..1a1255532b773f 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1541,6 +1541,24 @@ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_
//-------------------------------------------------
// AVX10 COMEF instructions
//-------------------------------------------------
+multiclass avx10_com_ef<bits<8> Opc, RegisterClass RC, ValueType VT,
+ SDPatternOperator OpNode, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ Domain d, X86FoldableSchedWrite sched = WriteFComX>{
+ let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in {
+ def rr : AVX512<Opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (VT RC:$src1), RC:$src2))]>,
+ EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+ let mayLoad = 1 in {
+ def rm : AVX512<Opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (VT RC:$src1), (ld_frag addr:$src2)))]>,
+ EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+ }
+ }
+}
+
multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
string OpcodeStr,
Domain d,
@@ -1564,6 +1582,15 @@ multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
}
let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in {
+ defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512,
+ "vucomxsd", f64mem, loadf64, SSEPackedDouble>,
+ TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+ defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512,
+ "vucomxsh", f16mem, loadf16, SSEPackedSingle>,
+ T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+ defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512,
+ "vucomxss", f32mem, loadf32, SSEPackedSingle>,
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
"vcomxsd", SSEPackedDouble>,
TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
new file mode 100644
index 00000000000000..de0bec7ea2695a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
@@ -0,0 +1,237 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X86
+
+define i1 @hoeq(half %x, half %y) {
+; X64-LABEL: hoeq:
+; X64: # %bb.0:
+; X64-NEXT: vucomxsh %xmm1, %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: hoeq:
+; X86: # %bb.0:
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %1 = fcmp oeq half %x, %y
+ ret i1 %1
+}
+
+define i1 @hune(half %x, half %y) {
+; X64-LABEL: hune:
+; X64: # %bb.0:
+; X64-NEXT: vucomxsh %xmm1, %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: hune:
+; X86: # %bb.0:
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %1 = fcmp une half %x, %y
+ ret i1 %1
+}
+
+define i1 @hoeq_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: hoeq_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT: vucomxsh (%rsi), %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: hoeq_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT: vucomxsh (%eax), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %x = load half, ptr %xp
+ %y = load half, ptr %yp
+ %1 = fcmp oeq half %x, %y
+ ret i1 %1
+}
+
+define i1 @hune_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: hune_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT: vucomxsh (%rsi), %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: hune_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT: vucomxsh (%eax), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %x = load half, ptr %xp
+ %y = load half, ptr %yp
+ %1 = fcmp une half %x, %y
+ ret i1 %1
+}
+
+define i1 @foeq(float %x, float %y) {
+; X64-LABEL: foeq:
+; X64: # %bb.0:
+; X64-NEXT: vucomxss %xmm1, %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: foeq:
+; X86: # %bb.0:
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %1 = fcmp oeq float %x, %y
+ ret i1 %1
+}
+
+define i1 @fune(float %x, float %y) {
+; X64-LABEL: fune:
+; X64: # %bb.0:
+; X64-NEXT: vucomxss %xmm1, %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: fune:
+; X86: # %bb.0:
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %1 = fcmp une float %x, %y
+ ret i1 %1
+}
+
+define i1 @foeq_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: foeq_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: vucomxss (%rsi), %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: foeq_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomxss (%eax), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %x = load float, ptr %xp
+ %y = load float, ptr %yp
+ %1 = fcmp oeq float %x, %y
+ ret i1 %1
+}
+
+define i1 @fune_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: fune_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: vucomxss (%rsi), %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: fune_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vucomxss (%eax), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %x = load float, ptr %xp
+ %y = load float, ptr %yp
+ %1 = fcmp une float %x, %y
+ ret i1 %1
+}
+
+define i1 @doeq(double %x, double %y) {
+; X64-LABEL: doeq:
+; X64: # %bb.0:
+; X64-NEXT: vucomxsd %xmm1, %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: doeq:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %1 = fcmp oeq double %x, %y
+ ret i1 %1
+}
+
+define i1 @dune(double %x, double %y) {
+; X64-LABEL: dune:
+; X64: # %bb.0:
+; X64-NEXT: vucomxsd %xmm1, %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: dune:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %1 = fcmp une double %x, %y
+ ret i1 %1
+}
+
+define i1 @doeq_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: doeq_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vucomxsd (%rsi), %xmm0
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-LABEL: doeq_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vucomxsd (%eax), %xmm0
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+ %x = load double, ptr %xp
+ %y = load double, ptr %yp
+ %1 = fcmp oeq double %x, %y
+ ret i1 %1
+}
+
+define i1 @dune_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: dune_mem:
+; X64: # %bb.0:
+; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vucomxsd (%rsi), %xmm0
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-LABEL: dune_mem:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vucomxsd (%eax), %xmm0
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+ %x = load double, ptr %xp
+ %y = load double, ptr %yp
+ %1 = fcmp une double %x, %y
+ ret i1 %1
+}
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 85d9b02ac0cbf1..fd6ee37d27e147 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1959,8 +1959,11 @@ static const X86FoldTableEntry Table1[] = {
{X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE},
{X86::VUCOMISSrr, X86::VUCOMISSrm, 0},
{X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE},
+ {X86::VUCOMXSDZrr, X86::VUCOMXSDZrm, 0},
{X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE},
+ {X86::VUCOMXSHZrr, X86::VUCOMXSHZrm, 0},
{X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE},
+ {X86::VUCOMXSSZrr, X86::VUCOMXSSZrm, 0},
{X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE},
{X86::XOR16ri8_ND, X86::XOR16mi8_ND, 0},
{X86::XOR16ri8_NF_ND, X86::XOR16mi8_NF_ND, 0},
|
Contributor
Author
|
Reviewing failure for vec-strict-cmp-128-f16.ll and fminimum-fmaximum.ll |
Collaborator
|
Please can you rename the PR title + summary so it describers the patch? It currently makes very little sense |
Contributor
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
#113567