From c8792ad1fdf8f3402627a6b4a5c402715fadeecf Mon Sep 17 00:00:00 2001 From: mattarde Date: Sun, 20 Oct 2024 11:28:34 -0700 Subject: [PATCH 1/4] update comef opt on 10_2 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 9 ++ llvm/lib/Target/X86/X86InstrAVX10.td | 23 +++++ llvm/test/CodeGen/X86/avx10_2-cmp.ll | 121 ++++++++++++++++++++++++ llvm/test/TableGen/x86-fold-tables.inc | 1 + 4 files changed, 154 insertions(+) create mode 100644 llvm/test/CodeGen/X86/avx10_2-cmp.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bcb84add65d83..22fcd3bf6bc8e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -49520,6 +49520,15 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, // FIXME: need symbolic constants for these magic numbers. // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; + + // VCOMXSS simplifies conditional code sequence into single setcc node + // and a CC node, Earlier until COMI, it required 2 SETCC's + if (Subtarget.hasAVX10_2()) { + return getSETCC( + ((cc0 == X86::COND_E) ? X86::COND_E : X86::COND_NE), + DAG.getNode(X86ISD::UCOMX, DL, MVT::i32, CMP00, CMP01), DL, + DAG); + } if (Subtarget.hasAVX512()) { SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 625f2e01d4721..f968789772838 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1541,6 +1541,24 @@ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_ //------------------------------------------------- // AVX10 COMEF instructions //------------------------------------------------- +multiclass avx10_com_ef Opc, RegisterClass RC, ValueType VT, + SDPatternOperator OpNode, string OpcodeStr, + X86MemOperand x86memop, PatFrag ld_frag, + Domain d, X86FoldableSchedWrite sched = WriteFComX>{ + let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in { + def rr : AVX512, + EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; + let mayLoad = 1 in { + def rm : AVX512, + EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; + } + } +} + multiclass avx10_com_ef_int Opc, X86VectorVTInfo _, SDNode OpNode, string OpcodeStr, Domain d, @@ -1564,6 +1582,11 @@ multiclass avx10_com_ef_int Opc, X86VectorVTInfo _, SDNode OpNode, } let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { + + defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512, + "vucomxss", f32mem, loadf32, SSEPackedSingle>, + TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; + defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, "vcomxsd", SSEPackedDouble>, TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll new file mode 100644 index 0000000000000..8c134e21070b8 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=AVX10_2_X64 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=AVX10_2_X86 + +define i1 @oeq(float %x, float %y) { +; AVX10_2_X64-LABEL: oeq: +; AVX10_2_X64: # %bb.0: +; AVX10_2_X64-NEXT: vucomxss %xmm1, %xmm0 +; AVX10_2_X64-NEXT: sete %al +; AVX10_2_X64-NEXT: retq +; +; AVX10_2_X86-LABEL: oeq: +; AVX10_2_X86: # %bb.0: +; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX10_2_X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 +; AVX10_2_X86-NEXT: sete %al +; AVX10_2_X86-NEXT: retl + %1 = fcmp oeq float %x, %y + ret i1 %1 +} + +define i1 @une(float %x, float %y) { +; AVX10_2_X64-LABEL: une: +; AVX10_2_X64: # %bb.0: +; AVX10_2_X64-NEXT: vucomxss %xmm1, %xmm0 +; AVX10_2_X64-NEXT: setne %al +; AVX10_2_X64-NEXT: retq +; +; AVX10_2_X86-LABEL: une: +; AVX10_2_X86: # %bb.0: +; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX10_2_X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 +; AVX10_2_X86-NEXT: setne %al +; AVX10_2_X86-NEXT: retl + %1 = fcmp une float %x, %y + ret i1 %1 +} + +define i1 @ogt(float %x, float %y) { +; AVX10_2_X64-LABEL: ogt: +; AVX10_2_X64: # %bb.0: +; AVX10_2_X64-NEXT: vucomiss %xmm1, %xmm0 +; AVX10_2_X64-NEXT: seta %al +; AVX10_2_X64-NEXT: retq +; +; AVX10_2_X86-LABEL: ogt: +; AVX10_2_X86: # %bb.0: +; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX10_2_X86-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0 +; AVX10_2_X86-NEXT: seta %al +; AVX10_2_X86-NEXT: retl + %1 = fcmp ogt float %x, %y + ret i1 %1 +} + +define i1 @oeq_mem(ptr %xp, ptr %yp) { +; AVX10_2_X64-LABEL: oeq_mem: +; AVX10_2_X64: # %bb.0: +; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX10_2_X64-NEXT: vucomxss (%rsi), %xmm0 +; AVX10_2_X64-NEXT: sete %al +; AVX10_2_X64-NEXT: retq +; +; AVX10_2_X86-LABEL: oeq_mem: +; AVX10_2_X86: # %bb.0: +; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX10_2_X86-NEXT: vucomxss (%eax), %xmm0 +; AVX10_2_X86-NEXT: sete %al +; AVX10_2_X86-NEXT: retl + %x = load float, ptr %xp + %y = load float, ptr %yp + %1 = fcmp oeq float %x, %y + ret i1 %1 +} + +define i1 @une_mem(ptr %xp, ptr %yp) { +; AVX10_2_X64-LABEL: une_mem: +; AVX10_2_X64: # %bb.0: +; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX10_2_X64-NEXT: vucomxss (%rsi), %xmm0 +; AVX10_2_X64-NEXT: setne %al +; AVX10_2_X64-NEXT: retq +; +; AVX10_2_X86-LABEL: une_mem: +; AVX10_2_X86: # %bb.0: +; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX10_2_X86-NEXT: vucomxss (%eax), %xmm0 +; AVX10_2_X86-NEXT: setne %al +; AVX10_2_X86-NEXT: retl + %x = load float, ptr %xp + %y = load float, ptr %yp + %1 = fcmp une float %x, %y + ret i1 %1 +} + + +define i1 @ogt_mem(ptr %xp, ptr %yp) { +; AVX10_2_X64-LABEL: ogt_mem: +; AVX10_2_X64: # %bb.0: +; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX10_2_X64-NEXT: vucomiss (%rsi), %xmm0 +; AVX10_2_X64-NEXT: seta %al +; AVX10_2_X64-NEXT: retq +; +; AVX10_2_X86-LABEL: ogt_mem: +; AVX10_2_X86: # %bb.0: +; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX10_2_X86-NEXT: vucomiss (%eax), %xmm0 +; AVX10_2_X86-NEXT: seta %al +; AVX10_2_X86-NEXT: retl + %x = load float, ptr %xp + %y = load float, ptr %yp + %1 = fcmp ogt float %x, %y + ret i1 %1 +} diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 85d9b02ac0cbf..e444c61354abe 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -1961,6 +1961,7 @@ static const X86FoldTableEntry Table1[] = { {X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE}, {X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE}, {X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSSZrr, X86::VUCOMXSSZrm, 0}, {X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE}, {X86::XOR16ri8_ND, X86::XOR16mi8_ND, 0}, {X86::XOR16ri8_NF_ND, X86::XOR16mi8_NF_ND, 0}, From 28825606c5f538adb46eb354cf6eaacc9427fcbc Mon Sep 17 00:00:00 2001 From: mattarde Date: Sun, 20 Oct 2024 11:40:05 -0700 Subject: [PATCH 2/4] update comment and format --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 22fcd3bf6bc8e..f6c34ccc4b045 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -49521,8 +49521,8 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; - // VCOMXSS simplifies conditional code sequence into single setcc node - // and a CC node, Earlier until COMI, it required 2 SETCC's + // VCOMXSS simplifies conditional code sequence into single setcc node. + // Earlier until COMI, it required upto 2 SETCC's to test CC. if (Subtarget.hasAVX10_2()) { return getSETCC( ((cc0 == X86::COND_E) ? X86::COND_E : X86::COND_NE), From 06ab8e750a583b844a5bdb63f37aec55ebd342a4 Mon Sep 17 00:00:00 2001 From: mattarde Date: Sun, 20 Oct 2024 11:47:58 -0700 Subject: [PATCH 3/4] address review comments --- llvm/test/CodeGen/X86/avx10_2-cmp.ll | 154 +++++++++++++-------------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll index 8c134e21070b8..29313fc7cc259 100644 --- a/llvm/test/CodeGen/X86/avx10_2-cmp.ll +++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll @@ -1,74 +1,74 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=AVX10_2_X64 -; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=AVX10_2_X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86 define i1 @oeq(float %x, float %y) { -; AVX10_2_X64-LABEL: oeq: -; AVX10_2_X64: # %bb.0: -; AVX10_2_X64-NEXT: vucomxss %xmm1, %xmm0 -; AVX10_2_X64-NEXT: sete %al -; AVX10_2_X64-NEXT: retq +; X64-LABEL: oeq: +; X64: # %bb.0: +; X64-NEXT: vucomxss %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq ; -; AVX10_2_X86-LABEL: oeq: -; AVX10_2_X86: # %bb.0: -; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX10_2_X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 -; AVX10_2_X86-NEXT: sete %al -; AVX10_2_X86-NEXT: retl +; X86-LABEL: oeq: +; X86: # %bb.0: +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl %1 = fcmp oeq float %x, %y ret i1 %1 } define i1 @une(float %x, float %y) { -; AVX10_2_X64-LABEL: une: -; AVX10_2_X64: # %bb.0: -; AVX10_2_X64-NEXT: vucomxss %xmm1, %xmm0 -; AVX10_2_X64-NEXT: setne %al -; AVX10_2_X64-NEXT: retq +; X64-LABEL: une: +; X64: # %bb.0: +; X64-NEXT: vucomxss %xmm1, %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq ; -; AVX10_2_X86-LABEL: une: -; AVX10_2_X86: # %bb.0: -; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX10_2_X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 -; AVX10_2_X86-NEXT: setne %al -; AVX10_2_X86-NEXT: retl +; X86-LABEL: une: +; X86: # %bb.0: +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl %1 = fcmp une float %x, %y ret i1 %1 } define i1 @ogt(float %x, float %y) { -; AVX10_2_X64-LABEL: ogt: -; AVX10_2_X64: # %bb.0: -; AVX10_2_X64-NEXT: vucomiss %xmm1, %xmm0 -; AVX10_2_X64-NEXT: seta %al -; AVX10_2_X64-NEXT: retq +; X64-LABEL: ogt: +; X64: # %bb.0: +; X64-NEXT: vucomiss %xmm1, %xmm0 +; X64-NEXT: seta %al +; X64-NEXT: retq ; -; AVX10_2_X86-LABEL: ogt: -; AVX10_2_X86: # %bb.0: -; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX10_2_X86-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0 -; AVX10_2_X86-NEXT: seta %al -; AVX10_2_X86-NEXT: retl +; X86-LABEL: ogt: +; X86: # %bb.0: +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: seta %al +; X86-NEXT: retl %1 = fcmp ogt float %x, %y ret i1 %1 } define i1 @oeq_mem(ptr %xp, ptr %yp) { -; AVX10_2_X64-LABEL: oeq_mem: -; AVX10_2_X64: # %bb.0: -; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX10_2_X64-NEXT: vucomxss (%rsi), %xmm0 -; AVX10_2_X64-NEXT: sete %al -; AVX10_2_X64-NEXT: retq +; X64-LABEL: oeq_mem: +; X64: # %bb.0: +; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vucomxss (%rsi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq ; -; AVX10_2_X86-LABEL: oeq_mem: -; AVX10_2_X86: # %bb.0: -; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX10_2_X86-NEXT: vucomxss (%eax), %xmm0 -; AVX10_2_X86-NEXT: sete %al -; AVX10_2_X86-NEXT: retl +; X86-LABEL: oeq_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss (%eax), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl %x = load float, ptr %xp %y = load float, ptr %yp %1 = fcmp oeq float %x, %y @@ -76,21 +76,21 @@ define i1 @oeq_mem(ptr %xp, ptr %yp) { } define i1 @une_mem(ptr %xp, ptr %yp) { -; AVX10_2_X64-LABEL: une_mem: -; AVX10_2_X64: # %bb.0: -; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX10_2_X64-NEXT: vucomxss (%rsi), %xmm0 -; AVX10_2_X64-NEXT: setne %al -; AVX10_2_X64-NEXT: retq +; X64-LABEL: une_mem: +; X64: # %bb.0: +; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vucomxss (%rsi), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq ; -; AVX10_2_X86-LABEL: une_mem: -; AVX10_2_X86: # %bb.0: -; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX10_2_X86-NEXT: vucomxss (%eax), %xmm0 -; AVX10_2_X86-NEXT: setne %al -; AVX10_2_X86-NEXT: retl +; X86-LABEL: une_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss (%eax), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl %x = load float, ptr %xp %y = load float, ptr %yp %1 = fcmp une float %x, %y @@ -99,21 +99,21 @@ define i1 @une_mem(ptr %xp, ptr %yp) { define i1 @ogt_mem(ptr %xp, ptr %yp) { -; AVX10_2_X64-LABEL: ogt_mem: -; AVX10_2_X64: # %bb.0: -; AVX10_2_X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX10_2_X64-NEXT: vucomiss (%rsi), %xmm0 -; AVX10_2_X64-NEXT: seta %al -; AVX10_2_X64-NEXT: retq +; X64-LABEL: ogt_mem: +; X64: # %bb.0: +; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vucomiss (%rsi), %xmm0 +; X64-NEXT: seta %al +; X64-NEXT: retq ; -; AVX10_2_X86-LABEL: ogt_mem: -; AVX10_2_X86: # %bb.0: -; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX10_2_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX10_2_X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX10_2_X86-NEXT: vucomiss (%eax), %xmm0 -; AVX10_2_X86-NEXT: seta %al -; AVX10_2_X86-NEXT: retl +; X86-LABEL: ogt_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomiss (%eax), %xmm0 +; X86-NEXT: seta %al +; X86-NEXT: retl %x = load float, ptr %xp %y = load float, ptr %yp %1 = fcmp ogt float %x, %y From 6d5f26cda85c89d61d1a39a15e34cccabb5c65f5 Mon Sep 17 00:00:00 2001 From: mattarde Date: Mon, 21 Oct 2024 02:17:38 -0700 Subject: [PATCH 4/4] update review comments for sh,sd and format --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 +- llvm/lib/Target/X86/X86InstrAVX10.td | 6 + llvm/test/CodeGen/X86/avx10_2-cmp.ll | 194 +++++++++++++++++++----- llvm/test/TableGen/x86-fold-tables.inc | 2 + 4 files changed, 165 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f6c34ccc4b045..71983a7d7c715 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -49521,8 +49521,8 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; - // VCOMXSS simplifies conditional code sequence into single setcc node. - // Earlier until COMI, it required upto 2 SETCC's to test CC. + // VCOMXSS simplifies conditional code sequence into single setcc + // node. Earlier until COMI, it required upto 2 SETCC's to test CC. if (Subtarget.hasAVX10_2()) { return getSETCC( ((cc0 == X86::COND_E) ? X86::COND_E : X86::COND_NE), diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index f968789772838..367fd67bec535 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1583,6 +1583,12 @@ multiclass avx10_com_ef_int Opc, X86VectorVTInfo _, SDNode OpNode, let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { + defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512, + "vucomxsd", f64mem, loadf64, SSEPackedSingle>, + TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; + defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512, + "vucomxsh", f16mem, loadf16, SSEPackedSingle>, + T_MAP5, XD, EVEX_CD8<16, CD8VT1>; defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512, "vucomxss", f32mem, loadf32, SSEPackedSingle>, TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll index 29313fc7cc259..62a187c3adc74 100644 --- a/llvm/test/CodeGen/X86/avx10_2-cmp.ll +++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll @@ -2,66 +2,127 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86 -define i1 @oeq(float %x, float %y) { -; X64-LABEL: oeq: +define i1 @hoeq(half %x, half %y) { +; X64-LABEL: hoeq: ; X64: # %bb.0: -; X64-NEXT: vucomxss %xmm1, %xmm0 +; X64-NEXT: vucomxsh %xmm1, %xmm0 ; X64-NEXT: sete %al ; X64-NEXT: retq ; -; X86-LABEL: oeq: +; X86-LABEL: hoeq: ; X86: # %bb.0: -; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: sete %al ; X86-NEXT: retl - %1 = fcmp oeq float %x, %y + %1 = fcmp oeq half %x, %y ret i1 %1 } -define i1 @une(float %x, float %y) { -; X64-LABEL: une: +define i1 @hune(half %x, half %y) { +; X64-LABEL: hune: ; X64: # %bb.0: -; X64-NEXT: vucomxss %xmm1, %xmm0 +; X64-NEXT: vucomxsh %xmm1, %xmm0 ; X64-NEXT: setne %al ; X64-NEXT: retq ; -; X86-LABEL: une: +; X86-LABEL: hune: +; X86: # %bb.0: +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %1 = fcmp une half %x, %y + ret i1 %1 +} + +define i1 @hoeq_mem(ptr %xp, ptr %yp) { +; X64-LABEL: hoeq_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vucomxsh (%rsi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: hoeq_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh (%eax), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %x = load half, ptr %xp + %y = load half, ptr %yp + %1 = fcmp oeq half %x, %y + ret i1 %1 +} + +define i1 @hune_mem(ptr %xp, ptr %yp) { +; X64-LABEL: hune_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vucomxsh (%rsi), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: hune_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh (%eax), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %x = load half, ptr %xp + %y = load half, ptr %yp + %1 = fcmp une half %x, %y + ret i1 %1 +} + +define i1 @foeq(float %x, float %y) { +; X64-LABEL: foeq: +; X64: # %bb.0: +; X64-NEXT: vucomxss %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: foeq: ; X86: # %bb.0: ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 -; X86-NEXT: setne %al +; X86-NEXT: sete %al ; X86-NEXT: retl - %1 = fcmp une float %x, %y + %1 = fcmp oeq float %x, %y ret i1 %1 } -define i1 @ogt(float %x, float %y) { -; X64-LABEL: ogt: +define i1 @fune(float %x, float %y) { +; X64-LABEL: fune: ; X64: # %bb.0: -; X64-NEXT: vucomiss %xmm1, %xmm0 -; X64-NEXT: seta %al +; X64-NEXT: vucomxss %xmm1, %xmm0 +; X64-NEXT: setne %al ; X64-NEXT: retq ; -; X86-LABEL: ogt: +; X86-LABEL: fune: ; X86: # %bb.0: ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0 -; X86-NEXT: seta %al +; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al ; X86-NEXT: retl - %1 = fcmp ogt float %x, %y + %1 = fcmp une float %x, %y ret i1 %1 } -define i1 @oeq_mem(ptr %xp, ptr %yp) { -; X64-LABEL: oeq_mem: +define i1 @foeq_mem(ptr %xp, ptr %yp) { +; X64-LABEL: foeq_mem: ; X64: # %bb.0: ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: vucomxss (%rsi), %xmm0 ; X64-NEXT: sete %al ; X64-NEXT: retq ; -; X86-LABEL: oeq_mem: +; X86-LABEL: foeq_mem: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -75,15 +136,15 @@ define i1 @oeq_mem(ptr %xp, ptr %yp) { ret i1 %1 } -define i1 @une_mem(ptr %xp, ptr %yp) { -; X64-LABEL: une_mem: +define i1 @fune_mem(ptr %xp, ptr %yp) { +; X64-LABEL: fune_mem: ; X64: # %bb.0: ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: vucomxss (%rsi), %xmm0 ; X64-NEXT: setne %al ; X64-NEXT: retq ; -; X86-LABEL: une_mem: +; X86-LABEL: fune_mem: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -97,25 +158,80 @@ define i1 @une_mem(ptr %xp, ptr %yp) { ret i1 %1 } +define i1 @doeq(double %x, double %y) { +; X64-LABEL: doeq: +; X64: # %bb.0: +; X64-NEXT: vucomxsd %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: doeq: +; X86: # %bb.0: +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %1 = fcmp oeq double %x, %y + ret i1 %1 +} -define i1 @ogt_mem(ptr %xp, ptr %yp) { -; X64-LABEL: ogt_mem: +define i1 @dune(double %x, double %y) { +; X64-LABEL: dune: ; X64: # %bb.0: -; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: vucomiss (%rsi), %xmm0 -; X64-NEXT: seta %al +; X64-NEXT: vucomxsd %xmm1, %xmm0 +; X64-NEXT: setne %al ; X64-NEXT: retq ; -; X86-LABEL: ogt_mem: +; X86-LABEL: dune: +; X86: # %bb.0: +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %1 = fcmp une double %x, %y + ret i1 %1 +} + +define i1 @doeq_mem(ptr %xp, ptr %yp) { +; X64-LABEL: doeq_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: vucomxsd (%rsi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: doeq_mem: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-NEXT: vucomiss (%eax), %xmm0 -; X86-NEXT: seta %al +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd (%eax), %xmm0 +; X86-NEXT: sete %al ; X86-NEXT: retl - %x = load float, ptr %xp - %y = load float, ptr %yp - %1 = fcmp ogt float %x, %y + %x = load double, ptr %xp + %y = load double, ptr %yp + %1 = fcmp oeq double %x, %y + ret i1 %1 +} + +define i1 @dune_mem(ptr %xp, ptr %yp) { +; X64-LABEL: dune_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: vucomxsd (%rsi), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: dune_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd (%eax), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %x = load double, ptr %xp + %y = load double, ptr %yp + %1 = fcmp une double %x, %y ret i1 %1 } diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index e444c61354abe..fd6ee37d27e14 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -1959,7 +1959,9 @@ static const X86FoldTableEntry Table1[] = { {X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE}, {X86::VUCOMISSrr, X86::VUCOMISSrm, 0}, {X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSDZrr, X86::VUCOMXSDZrm, 0}, {X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSHZrr, X86::VUCOMXSHZrm, 0}, {X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE}, {X86::VUCOMXSSZrr, X86::VUCOMXSSZrm, 0}, {X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE},