From 09d1f74552c1f07ff9d7e1151b8e6e05a1801093 Mon Sep 17 00:00:00 2001 From: mattarde Date: Wed, 23 Oct 2024 22:29:30 -0700 Subject: [PATCH 01/12] update compare opt --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 ++++++++++++ llvm/lib/Target/X86/X86InstrAVX10.td | 28 +++++++++++++++++++++++++ llvm/test/TableGen/x86-fold-tables.inc | 3 +++ 3 files changed, 44 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bcb84add65d83..062d4baf99ffa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1056,6 +1056,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } + if(Subtarget.hasAVX10_2_512()){ + for (auto FVT : { MVT::f16, MVT::f32, MVT::f64 }) { + setOperationAction(ISD::SETCC, FVT, Custom); + } + } // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) { @@ -49520,6 +49525,14 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, // FIXME: need symbolic constants for these magic numbers. // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; + // VCOMXSS simplifies conditional code sequence into single setcc + // node. Earlier until COMI, it required upto 2 SETCC's to test CC. + if (Subtarget.hasAVX10_2()) { + return getSETCC( + ((cc0 == X86::COND_E) ? X86::COND_E : X86::COND_NE), + DAG.getNode(X86ISD::UCOMX, DL, MVT::i32, CMP00, CMP01), DL, + DAG); + } if (Subtarget.hasAVX512()) { SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 625f2e01d4721..c67ef49940e51 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1541,6 +1541,24 @@ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_ //------------------------------------------------- // AVX10 COMEF instructions //------------------------------------------------- +multiclass avx10_com_ef Opc, RegisterClass RC, ValueType VT, + SDPatternOperator OpNode, string OpcodeStr, + X86MemOperand x86memop, PatFrag ld_frag, + Domain d, X86FoldableSchedWrite sched = WriteFComX>{ + let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in { + def rr : AVX512, + EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; + let mayLoad = 1 in { + def rm : AVX512, + EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; + } + } +} + multiclass avx10_com_ef_int Opc, X86VectorVTInfo _, SDNode OpNode, string OpcodeStr, Domain d, @@ -1564,6 +1582,16 @@ multiclass avx10_com_ef_int Opc, X86VectorVTInfo _, SDNode OpNode, } let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { + + defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512, + "vucomxsd", f64mem, loadf64, SSEPackedSingle>, + TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; + defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512, + "vucomxsh", f16mem, loadf16, SSEPackedSingle>, + T_MAP5, XD, EVEX_CD8<16, CD8VT1>; + defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512, + "vucomxss", f32mem, loadf32, SSEPackedSingle>, + TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, "vcomxsd", SSEPackedDouble>, TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 85d9b02ac0cbf..fd6ee37d27e14 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -1959,8 +1959,11 @@ static const X86FoldTableEntry Table1[] = { {X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE}, {X86::VUCOMISSrr, X86::VUCOMISSrm, 0}, {X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSDZrr, X86::VUCOMXSDZrm, 0}, {X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSHZrr, X86::VUCOMXSHZrm, 0}, {X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSSZrr, X86::VUCOMXSSZrm, 0}, {X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE}, {X86::XOR16ri8_ND, X86::XOR16mi8_ND, 0}, {X86::XOR16ri8_NF_ND, X86::XOR16mi8_NF_ND, 0}, From 44baff27402285d5294935a2ca8f2edccc992d79 Mon Sep 17 00:00:00 2001 From: mattarde Date: Wed, 23 Oct 2024 22:29:54 -0700 Subject: [PATCH 02/12] add test --- llvm/test/CodeGen/X86/avx10_2-cmp.ll | 237 +++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 llvm/test/CodeGen/X86/avx10_2-cmp.ll diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll new file mode 100644 index 0000000000000..62a187c3adc74 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll @@ -0,0 +1,237 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86 + +define i1 @hoeq(half %x, half %y) { +; X64-LABEL: hoeq: +; X64: # %bb.0: +; X64-NEXT: vucomxsh %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: hoeq: +; X86: # %bb.0: +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %1 = fcmp oeq half %x, %y + ret i1 %1 +} + +define i1 @hune(half %x, half %y) { +; X64-LABEL: hune: +; X64: # %bb.0: +; X64-NEXT: vucomxsh %xmm1, %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: hune: +; X86: # %bb.0: +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %1 = fcmp une half %x, %y + ret i1 %1 +} + +define i1 @hoeq_mem(ptr %xp, ptr %yp) { +; X64-LABEL: hoeq_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vucomxsh (%rsi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: hoeq_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh (%eax), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %x = load half, ptr %xp + %y = load half, ptr %yp + %1 = fcmp oeq half %x, %y + ret i1 %1 +} + +define i1 @hune_mem(ptr %xp, ptr %yp) { +; X64-LABEL: hune_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vucomxsh (%rsi), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: hune_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh (%eax), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %x = load half, ptr %xp + %y = load half, ptr %yp + %1 = fcmp une half %x, %y + ret i1 %1 +} + +define i1 @foeq(float %x, float %y) { +; X64-LABEL: foeq: +; X64: # %bb.0: +; X64-NEXT: vucomxss %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: foeq: +; X86: # %bb.0: +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %1 = fcmp oeq float %x, %y + ret i1 %1 +} + +define i1 @fune(float %x, float %y) { +; X64-LABEL: fune: +; X64: # %bb.0: +; X64-NEXT: vucomxss %xmm1, %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: fune: +; X86: # %bb.0: +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %1 = fcmp une float %x, %y + ret i1 %1 +} + +define i1 @foeq_mem(ptr %xp, ptr %yp) { +; X64-LABEL: foeq_mem: +; X64: # %bb.0: +; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vucomxss (%rsi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: foeq_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss (%eax), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %x = load float, ptr %xp + %y = load float, ptr %yp + %1 = fcmp oeq float %x, %y + ret i1 %1 +} + +define i1 @fune_mem(ptr %xp, ptr %yp) { +; X64-LABEL: fune_mem: +; X64: # %bb.0: +; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vucomxss (%rsi), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: fune_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss (%eax), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %x = load float, ptr %xp + %y = load float, ptr %yp + %1 = fcmp une float %x, %y + ret i1 %1 +} + +define i1 @doeq(double %x, double %y) { +; X64-LABEL: doeq: +; X64: # %bb.0: +; X64-NEXT: vucomxsd %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: doeq: +; X86: # %bb.0: +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %1 = fcmp oeq double %x, %y + ret i1 %1 +} + +define i1 @dune(double %x, double %y) { +; X64-LABEL: dune: +; X64: # %bb.0: +; X64-NEXT: vucomxsd %xmm1, %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: dune: +; X86: # %bb.0: +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %1 = fcmp une double %x, %y + ret i1 %1 +} + +define i1 @doeq_mem(ptr %xp, ptr %yp) { +; X64-LABEL: doeq_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: vucomxsd (%rsi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: doeq_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd (%eax), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %x = load double, ptr %xp + %y = load double, ptr %yp + %1 = fcmp oeq double %x, %y + ret i1 %1 +} + +define i1 @dune_mem(ptr %xp, ptr %yp) { +; X64-LABEL: dune_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: vucomxsd (%rsi), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: dune_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd (%eax), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %x = load double, ptr %xp + %y = load double, ptr %yp + %1 = fcmp une double %x, %y + ret i1 %1 +} From d242ba8c2e728375c43270a3ba6672b62ea51265 Mon Sep 17 00:00:00 2001 From: mattarde Date: Thu, 24 Oct 2024 03:32:18 -0700 Subject: [PATCH 03/12] update mod --- llvm/lib/Target/X86/X86ISelLowering.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 062d4baf99ffa..6792885a405a5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -219,8 +219,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // SETOEQ and SETUNE require checking two conditions. for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) { - setCondCodeAction(ISD::SETOEQ, VT, Expand); - setCondCodeAction(ISD::SETUNE, VT, Expand); + setCondCodeAction(ISD::SETOEQ, VT, Subtarget.hasAVX10_2_512() ? Custom : Expand); + setCondCodeAction(ISD::SETUNE, VT, Subtarget.hasAVX10_2_512() ? Custom : Expand); } // Integer absolute. @@ -1056,11 +1056,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } - if(Subtarget.hasAVX10_2_512()){ - for (auto FVT : { MVT::f16, MVT::f32, MVT::f64 }) { - setOperationAction(ISD::SETCC, FVT, Custom); - } - } // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) { @@ -2447,7 +2442,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } } - + if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); @@ -24078,6 +24073,14 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; } + if(Subtarget.hasAVX10_2_512()){ + if( CC == ISD::SETOEQ || CC == ISD::SETUNE){ + auto NewCC = (CC == ISD:::SETOEQ) ? X86::COND_E : (X86::COND_NE); + return getSETCC(NewCC, + DAG.getNode(X86ISD::UCOMX, + dl, MVT::i32, Op0, Op1), dl, DAG); + } + } // Handle floating point. X86::CondCode CondCode = TranslateX86CC(CC, dl, /*IsFP*/ true, Op0, Op1, DAG); if (CondCode == X86::COND_INVALID) From f56ad5c6b823c1f53df16d826cee8c71f29596ff Mon Sep 17 00:00:00 2001 From: mattarde Date: Thu, 24 Oct 2024 06:21:00 -0700 Subject: [PATCH 04/12] add legal CC in 10.2 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 35 +++++++++++-------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6792885a405a5..42c8175ccc92c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -218,10 +218,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::f64, MVT::f32, Expand); // SETOEQ and SETUNE require checking two conditions. - for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) { - setCondCodeAction(ISD::SETOEQ, VT, Subtarget.hasAVX10_2_512() ? Custom : Expand); - setCondCodeAction(ISD::SETUNE, VT, Subtarget.hasAVX10_2_512() ? Custom : Expand); + for (auto VT : {MVT::f32, MVT::f64}) { + setCondCodeAction(ISD::SETOEQ, VT, + Subtarget.hasAVX10_2() ? Custom : Expand); + setCondCodeAction(ISD::SETUNE, VT, + Subtarget.hasAVX10_2() ? Custom : Expand); } + setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); + setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); // Integer absolute. if (Subtarget.canUseCMOV()) { @@ -2292,8 +2296,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); - setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand); - setCondCodeAction(ISD::SETUNE, MVT::f16, Expand); + setCondCodeAction(ISD::SETOEQ, MVT::f16, + Subtarget.hasAVX10_2() ? Custom : Expand); + setCondCodeAction(ISD::SETUNE, MVT::f16, + Subtarget.hasAVX10_2() ? Custom : Expand); if (Subtarget.useAVX512Regs()) { setGroup(MVT::v32f16); @@ -24073,12 +24079,11 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; } - if(Subtarget.hasAVX10_2_512()){ - if( CC == ISD::SETOEQ || CC == ISD::SETUNE){ - auto NewCC = (CC == ISD:::SETOEQ) ? X86::COND_E : (X86::COND_NE); - return getSETCC(NewCC, - DAG.getNode(X86ISD::UCOMX, - dl, MVT::i32, Op0, Op1), dl, DAG); + if (Subtarget.hasAVX10_2_512()) { + if (CC == ISD::SETOEQ || CC == ISD::SETUNE) { + auto NewCC = (CC == ISD::SETOEQ) ? X86::COND_E : (X86::COND_NE); + return getSETCC(NewCC, DAG.getNode(X86ISD::UCOMX, dl, MVT::i32, Op0, Op1), + dl, DAG); } } // Handle floating point. @@ -49528,14 +49533,6 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, // FIXME: need symbolic constants for these magic numbers. // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; - // VCOMXSS simplifies conditional code sequence into single setcc - // node. Earlier until COMI, it required upto 2 SETCC's to test CC. - if (Subtarget.hasAVX10_2()) { - return getSETCC( - ((cc0 == X86::COND_E) ? X86::COND_E : X86::COND_NE), - DAG.getNode(X86ISD::UCOMX, DL, MVT::i32, CMP00, CMP01), DL, - DAG); - } if (Subtarget.hasAVX512()) { SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, From cdd0050d810c397416807387cff22dbcdad5e7d0 Mon Sep 17 00:00:00 2001 From: mattarde Date: Thu, 24 Oct 2024 06:28:58 -0700 Subject: [PATCH 05/12] remove space --- llvm/lib/Target/X86/X86ISelLowering.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 42c8175ccc92c..7ff5f230fbfe6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2448,7 +2448,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } } - if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); From 6aa7edfe5694d3de8dde99a552dd15acc3312ab9 Mon Sep 17 00:00:00 2001 From: mattarde Date: Thu, 24 Oct 2024 10:45:53 -0700 Subject: [PATCH 06/12] update review comment --- llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++-------- llvm/lib/Target/X86/X86InstrAVX10.td | 3 +-- llvm/test/CodeGen/X86/avx10_2-cmp.ll | 4 ++-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7ff5f230fbfe6..0da021e9b533d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -218,15 +218,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::f64, MVT::f32, Expand); // SETOEQ and SETUNE require checking two conditions. - for (auto VT : {MVT::f32, MVT::f64}) { - setCondCodeAction(ISD::SETOEQ, VT, - Subtarget.hasAVX10_2() ? Custom : Expand); - setCondCodeAction(ISD::SETUNE, VT, - Subtarget.hasAVX10_2() ? Custom : Expand); + for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) { + setCondCodeAction(ISD::SETOEQ, VT, Expand); + setCondCodeAction(ISD::SETUNE, VT, Expand); } - setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); - setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); + if (Subtarget.hasAVX10_2()) { + for (auto VT : {MVT::f32, MVT::f64}) { + setCondCodeAction(ISD::SETOEQ, VT, Custom); + setCondCodeAction(ISD::SETUNE, VT, Custom); + } + } // Integer absolute. if (Subtarget.canUseCMOV()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); @@ -24078,7 +24080,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; } - if (Subtarget.hasAVX10_2_512()) { + if (Subtarget.hasAVX10_2()) { if (CC == ISD::SETOEQ || CC == ISD::SETUNE) { auto NewCC = (CC == ISD::SETOEQ) ? X86::COND_E : (X86::COND_NE); return getSETCC(NewCC, DAG.getNode(X86ISD::UCOMX, dl, MVT::i32, Op0, Op1), diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index c67ef49940e51..1a1255532b773 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1582,9 +1582,8 @@ multiclass avx10_com_ef_int Opc, X86VectorVTInfo _, SDNode OpNode, } let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { - defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512, - "vucomxsd", f64mem, loadf64, SSEPackedSingle>, + "vucomxsd", f64mem, loadf64, SSEPackedDouble>, TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512, "vucomxsh", f16mem, loadf16, SSEPackedSingle>, diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll index 62a187c3adc74..de0bec7ea2695 100644 --- a/llvm/test/CodeGen/X86/avx10_2-cmp.ll +++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64 -; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X86 define i1 @hoeq(half %x, half %y) { ; X64-LABEL: hoeq: From 92c94714bd3504e03e04516ce87585ccd554a6bd Mon Sep 17 00:00:00 2001 From: mattarde Date: Thu, 24 Oct 2024 22:25:18 -0700 Subject: [PATCH 07/12] remove space --- llvm/lib/Target/X86/X86ISelLowering.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0da021e9b533d..d691346020a02 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2450,6 +2450,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } } + if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); From da9493cccf77f1cce2388958ee1c5272aca44afa Mon Sep 17 00:00:00 2001 From: mattarde Date: Sun, 27 Oct 2024 21:45:58 -0700 Subject: [PATCH 08/12] update review comments faulty commit --- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d691346020a02..cd17ad5571572 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -224,7 +224,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (Subtarget.hasAVX10_2()) { - for (auto VT : {MVT::f32, MVT::f64}) { + for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) { setCondCodeAction(ISD::SETOEQ, VT, Custom); setCondCodeAction(ISD::SETUNE, VT, Custom); } @@ -2298,10 +2298,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); - setCondCodeAction(ISD::SETOEQ, MVT::f16, - Subtarget.hasAVX10_2() ? Custom : Expand); - setCondCodeAction(ISD::SETUNE, MVT::f16, - Subtarget.hasAVX10_2() ? Custom : Expand); + setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand); + setCondCodeAction(ISD::SETUNE, MVT::f16, Expand); if (Subtarget.useAVX512Regs()) { setGroup(MVT::v32f16); From 1f7b326b313d5d52bc9d8c5d1249a7063aad43a9 Mon Sep 17 00:00:00 2001 From: mattarde Date: Mon, 28 Oct 2024 00:28:11 -0700 Subject: [PATCH 09/12] Revert "update review comments faulty commit" This reverts commit da9493cccf77f1cce2388958ee1c5272aca44afa. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cd17ad5571572..d691346020a02 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -224,7 +224,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (Subtarget.hasAVX10_2()) { - for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) { + for (auto VT : {MVT::f32, MVT::f64}) { setCondCodeAction(ISD::SETOEQ, VT, Custom); setCondCodeAction(ISD::SETUNE, VT, Custom); } @@ -2298,8 +2298,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); - setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand); - setCondCodeAction(ISD::SETUNE, MVT::f16, Expand); + setCondCodeAction(ISD::SETOEQ, MVT::f16, + Subtarget.hasAVX10_2() ? Custom : Expand); + setCondCodeAction(ISD::SETUNE, MVT::f16, + Subtarget.hasAVX10_2() ? Custom : Expand); if (Subtarget.useAVX512Regs()) { setGroup(MVT::v32f16); From 55edbe36af8b134b9e837277989f5855b27aef78 Mon Sep 17 00:00:00 2001 From: mattarde Date: Tue, 29 Oct 2024 05:58:50 -0700 Subject: [PATCH 10/12] update review comments --- llvm/lib/Target/X86/X86ISelLowering.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d691346020a02..d6eac8b1c6764 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -223,12 +223,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setCondCodeAction(ISD::SETUNE, VT, Expand); } - if (Subtarget.hasAVX10_2()) { - for (auto VT : {MVT::f32, MVT::f64}) { - setCondCodeAction(ISD::SETOEQ, VT, Custom); - setCondCodeAction(ISD::SETUNE, VT, Custom); - } - } // Integer absolute. if (Subtarget.canUseCMOV()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); @@ -2298,10 +2292,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); - setCondCodeAction(ISD::SETOEQ, MVT::f16, - Subtarget.hasAVX10_2() ? Custom : Expand); - setCondCodeAction(ISD::SETUNE, MVT::f16, - Subtarget.hasAVX10_2() ? Custom : Expand); + setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand); + setCondCodeAction(ISD::SETUNE, MVT::f16, Expand); if (Subtarget.useAVX512Regs()) { setGroup(MVT::v32f16); @@ -2449,6 +2441,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SETCC, VT, Custom); } } + for (auto VT : { MVT::f32, MVT::f64}) { + setCondCodeAction(ISD::SETOEQ, VT, Custom); + setCondCodeAction(ISD::SETUNE, VT, Custom); + } } if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { From 9f2241b3a3509099737ce56d0052169c9778d63d Mon Sep 17 00:00:00 2001 From: mattarde Date: Tue, 29 Oct 2024 06:01:24 -0700 Subject: [PATCH 11/12] add f16 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d6eac8b1c6764..711c8f65b0a2a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2441,7 +2441,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SETCC, VT, Custom); } } - for (auto VT : { MVT::f32, MVT::f64}) { + for (auto VT : { MVT::f16, MVT::f32, MVT::f64}) { setCondCodeAction(ISD::SETOEQ, VT, Custom); setCondCodeAction(ISD::SETUNE, VT, Custom); } From 857bbe39fd4ccf6824eea28855d209fe264daa86 Mon Sep 17 00:00:00 2001 From: mattarde Date: Tue, 29 Oct 2024 09:52:39 -0700 Subject: [PATCH 12/12] remove space --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 711c8f65b0a2a..24dae783d1530 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2441,7 +2441,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SETCC, VT, Custom); } } - for (auto VT : { MVT::f16, MVT::f32, MVT::f64}) { + for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) { setCondCodeAction(ISD::SETOEQ, VT, Custom); setCondCodeAction(ISD::SETUNE, VT, Custom); }