diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e7f6032ee7d74..a0514e93d6598 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2442,6 +2442,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSQRT, VT, Legal); setOperationAction(ISD::FMA, VT, Legal); setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::FMINIMUM, VT, Custom); + setOperationAction(ISD::FMAXIMUM, VT, Custom); } if (Subtarget.hasAVX10_2_512()) { setOperationAction(ISD::FADD, MVT::v32bf16, Legal); @@ -2451,6 +2453,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal); setOperationAction(ISD::FMA, MVT::v32bf16, Legal); setOperationAction(ISD::SETCC, MVT::v32bf16, Custom); + setOperationAction(ISD::FMINIMUM, MVT::v32bf16, Custom); + setOperationAction(ISD::FMAXIMUM, MVT::v32bf16, Custom); } for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) { setCondCodeAction(ISD::SETOEQ, VT, Custom); @@ -28842,6 +28846,20 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget, SDValue X = Op.getOperand(0); SDValue Y = Op.getOperand(1); SDLoc DL(Op); + if (Subtarget.hasAVX10_2() && TLI.isTypeLegal(VT)) { + unsigned Opc = 0; + if (VT.isVector()) + Opc = X86ISD::VMINMAX; + else if (VT == MVT::f16 || VT == MVT::f32 || VT == MVT::f64) + Opc = X86ISD::VMINMAXS; + + if (Opc) { + SDValue Imm = + DAG.getTargetConstant(Op.getOpcode() == ISD::FMAXIMUM, DL, MVT::i32); + return DAG.getNode(Opc, DL, VT, X, Y, Imm, Op->getFlags()); + } + } + uint64_t SizeInBits = VT.getScalarSizeInBits(); APInt PreferredZero = APInt::getZero(SizeInBits); APInt OppositeZero = PreferredZero; diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 0301c07dfb540..3bc64eda01a9c 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -403,28 +403,42 @@ multiclass avx10_minmax_scalar { let ExeDomain = _.ExeDomain, Predicates = [HasAVX10_2] in { let mayRaiseFPException = 1 in { - defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), - (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), - OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", - (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 timm:$src3)))>, - Sched<[WriteFMAX]>; - - defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst), - (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), - OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", - (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), - (i32 timm:$src3)))>, + let isCodeGenOnly = 1 in { + def rri : AVX512Ii8<0x53, MRMSrcReg, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), + !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, (i32 timm:$src3)))]>, + Sched<[WriteFMAX]>; + + def rmi : AVX512Ii8<0x53, MRMSrcMem, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), + !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2), + (i32 timm:$src3)))]>, + Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; + } + defm rri_Int : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), + (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), + OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), + (i32 timm:$src3)))>, + Sched<[WriteFMAX]>; + + defm rmi_Int : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst), + (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), + OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), + (i32 timm:$src3)))>, Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; } let Uses = [], mayRaiseFPException = 0 in - defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), - (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), - OpStr, "$src3, {sae}, $src2, $src1", - "$src1, $src2, {sae}, $src3", - (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 timm:$src3)))>, - Sched<[WriteFMAX]>, EVEX_B; + defm rrib_Int : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), + (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), + OpStr, "$src3, {sae}, $src2, $src1", + "$src1, $src2, {sae}, $src3", + (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), + (i32 timm:$src3)))>, + Sched<[WriteFMAX]>, EVEX_B; } } diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll index c6da0c5ca4792..1dcce5336895f 100644 --- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX10_2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86 declare float @llvm.maximum.f32(float, float) @@ -73,6 +74,11 @@ define float @test_fmaximum(float %x, float %y) nounwind { ; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxss $1, %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum: ; X86: # %bb.0: ; X86-NEXT: pushl %eax @@ -110,6 +116,11 @@ define <4 x float> @test_fmaximum_scalarize(<4 x float> %x, <4 x float> %y) "no- ; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_scalarize: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxps $1, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_scalarize: ; X86: # %bb.0: ; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0 @@ -129,6 +140,11 @@ define float @test_fmaximum_nan0(float %x, float %y) { ; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_nan0: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_nan0: ; X86: # %bb.0: ; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} @@ -148,6 +164,11 @@ define float @test_fmaximum_nan1(float %x, float %y) { ; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_nan1: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_nan1: ; X86: # %bb.0: ; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} @@ -215,6 +236,13 @@ define float @test_fmaximum_nnan(float %x, float %y) nounwind { ; AVX512DQ-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512DQ-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_nnan: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vaddss %xmm1, %xmm0, %xmm2 +; AVX10_2-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: vminmaxss $1, %xmm0, %xmm2 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_nnan: ; X86: # %bb.0: ; X86-NEXT: pushl %eax @@ -272,6 +300,12 @@ define double @test_fmaximum_zero0(double %x, double %y) nounwind { ; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_zero0: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX10_2-NEXT: vminmaxsd $1, %xmm0, %xmm1 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_zero0: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp @@ -323,6 +357,12 @@ define double @test_fmaximum_zero1(double %x, double %y) nounwind { ; AVX512-NEXT: vmovapd %xmm1, %xmm0 ; AVX512-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_zero1: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX10_2-NEXT: vminmaxsd $1, %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_zero1: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp @@ -354,6 +394,11 @@ define double @test_fmaximum_zero2(double %x, double %y) { ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_zero2: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_zero2: ; X86: # %bb.0: ; X86-NEXT: fldz @@ -390,6 +435,11 @@ define float @test_fmaximum_nsz(float %x, float %y) "no-signed-zeros-fp-math"="t ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_nsz: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxss $1, %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_nsz: ; X86: # %bb.0: ; X86-NEXT: pushl %eax @@ -474,6 +524,12 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind { ; AVX512DQ-NEXT: vmaxss %xmm2, %xmm0, %xmm0 ; AVX512DQ-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_combine_cmps: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vdivss %xmm0, %xmm1, %xmm1 +; AVX10_2-NEXT: vminmaxss $1, %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_combine_cmps: ; X86: # %bb.0: ; X86-NEXT: pushl %eax @@ -562,6 +618,11 @@ define float @test_fminimum(float %x, float %y) nounwind { ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxss $0, %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum: ; X86: # %bb.0: ; X86-NEXT: pushl %eax @@ -599,6 +660,11 @@ define <2 x double> @test_fminimum_scalarize(<2 x double> %x, <2 x double> %y) " ; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_scalarize: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_scalarize: ; X86: # %bb.0: ; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0 @@ -618,6 +684,11 @@ define float @test_fminimum_nan0(float %x, float %y) { ; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_nan0: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_nan0: ; X86: # %bb.0: ; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} @@ -637,6 +708,11 @@ define float @test_fminimum_nan1(float %x, float %y) { ; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_nan1: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_nan1: ; X86: # %bb.0: ; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} @@ -695,6 +771,11 @@ define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true" ; AVX512DQ-NEXT: vminsd %xmm2, %xmm1, %xmm0 ; AVX512DQ-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_nnan: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxsd $0, %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_nnan: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp @@ -749,6 +830,11 @@ define double @test_fminimum_zero0(double %x, double %y) nounwind { ; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_zero0: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxsd $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_zero0: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp @@ -796,6 +882,11 @@ define double @test_fminimum_zero1(double %x, double %y) nounwind { ; AVX512-NEXT: vmovapd %xmm1, %xmm0 ; AVX512-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_zero1: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxsd $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_zero1: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp @@ -826,6 +917,11 @@ define double @test_fminimum_zero2(double %x, double %y) { ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0] ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_zero2: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vmovsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0] +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_zero2: ; X86: # %bb.0: ; X86-NEXT: fldz @@ -863,6 +959,11 @@ define float @test_fminimum_nsz(float %x, float %y) nounwind { ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_nsz: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxss $0, %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_nsz: ; X86: # %bb.0: ; X86-NEXT: pushl %eax @@ -948,6 +1049,12 @@ define float @test_fminimum_combine_cmps(float %x, float %y) nounwind { ; AVX512DQ-NEXT: vminss %xmm2, %xmm0, %xmm0 ; AVX512DQ-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_combine_cmps: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vdivss %xmm0, %xmm1, %xmm1 +; AVX10_2-NEXT: vminmaxss $0, %xmm1, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_combine_cmps: ; X86: # %bb.0: ; X86-NEXT: pushl %eax @@ -1009,6 +1116,11 @@ define <2 x double> @test_fminimum_vector(<2 x double> %x, <2 x double> %y) { ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_vector: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_vector: ; X86: # %bb.0: ; X86-NEXT: vblendvpd %xmm0, %xmm0, %xmm1, %xmm2 @@ -1032,6 +1144,11 @@ define <4 x float> @test_fmaximum_vector(<4 x float> %x, <4 x float> %y) "no-nan ; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_vector: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxps $1, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_vector: ; X86: # %bb.0: ; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0 @@ -1054,6 +1171,12 @@ define <2 x double> @test_fminimum_vector_zero(<2 x double> %x) { ; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_vector_zero: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_vector_zero: ; X86: # %bb.0: ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -1077,6 +1200,11 @@ define <4 x float> @test_fmaximum_vector_signed_zero(<4 x float> %x) { ; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_vector_signed_zero: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxps $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_vector_signed_zero: ; X86: # %bb.0: ; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] @@ -1102,6 +1230,13 @@ define <2 x double> @test_fminimum_vector_partially_zero(<2 x double> %x) { ; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_vector_partially_zero: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX10_2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_vector_partially_zero: ; X86: # %bb.0: ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -1149,6 +1284,13 @@ define <2 x double> @test_fminimum_vector_different_zeros(<2 x double> %x) { ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_vector_different_zeros: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX10_2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_vector_different_zeros: ; X86: # %bb.0: ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -1177,6 +1319,11 @@ define <4 x float> @test_fmaximum_vector_non_zero(<4 x float> %x) { ; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_vector_non_zero: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxps $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_vector_non_zero: ; X86: # %bb.0: ; X86-NEXT: vmovaps {{.*#+}} xmm1 = [5.0E+0,4.0E+0,3.0E+0,2.0E+0] @@ -1206,6 +1353,13 @@ define <2 x double> @test_fminimum_vector_nan(<2 x double> %x) { ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_vector_nan: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX10_2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_vector_nan: ; X86: # %bb.0: ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -1232,6 +1386,12 @@ define <2 x double> @test_fminimum_vector_zero_first(<2 x double> %x) { ; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_vector_zero_first: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_vector_zero_first: ; X86: # %bb.0: ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -1260,6 +1420,11 @@ define <2 x double> @test_fminimum_vector_signed_zero(<2 x double> %x) { ; AVX-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fminimum_vector_signed_zero: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxpd $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fminimum_vector_signed_zero: ; X86: # %bb.0: ; X86-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm1 @@ -1284,6 +1449,11 @@ define <4 x float> @test_fmaximum_vector_signed_zero_first(<4 x float> %x) { ; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_vector_signed_zero_first: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxps $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_vector_signed_zero_first: ; X86: # %bb.0: ; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] @@ -1314,6 +1484,12 @@ define <4 x float> @test_fmaximum_vector_zero(<4 x float> %x) { ; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_vector_zero: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX10_2-NEXT: vminmaxps $1, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_vector_zero: ; X86: # %bb.0: ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -1369,6 +1545,12 @@ define <4 x float> @test_fmaximum_v4f32_splat(<4 x float> %x, float %y) { ; AVX512-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_v4f32_splat: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vbroadcastss %xmm1, %xmm1 +; AVX10_2-NEXT: vminmaxps $1, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_v4f32_splat: ; X86: # %bb.0: ; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1 @@ -1803,6 +1985,11 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind { ; AVX512-NEXT: popq %rbp ; AVX512-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_v4f16: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxph $1, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_v4f16: ; X86: # %bb.0: ; X86-NEXT: subl $164, %esp @@ -2330,6 +2517,11 @@ define <4 x bfloat> @test_fmaximum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) { ; AVX512-NEXT: .cfi_def_cfa_offset 8 ; AVX512-NEXT: retq ; +; AVX10_2-LABEL: test_fmaximum_v4bf16: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: vminmaxnepbf16 $1, %xmm1, %xmm0, %xmm0 +; AVX10_2-NEXT: retq +; ; X86-LABEL: test_fmaximum_v4bf16: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 36f6afacdf09d..8cfaa18a5cfac 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -3085,9 +3085,12 @@ static const X86FoldTableEntry Table2[] = { {X86::VMINMAXPSZ128rri, X86::VMINMAXPSZ128rmi, 0}, {X86::VMINMAXPSZ256rri, X86::VMINMAXPSZ256rmi, 0}, {X86::VMINMAXPSZrri, X86::VMINMAXPSZrmi, 0}, - {X86::VMINMAXSDrri, X86::VMINMAXSDrmi, TB_NO_REVERSE}, - {X86::VMINMAXSHrri, X86::VMINMAXSHrmi, TB_NO_REVERSE}, - {X86::VMINMAXSSrri, X86::VMINMAXSSrmi, TB_NO_REVERSE}, + {X86::VMINMAXSDrri, X86::VMINMAXSDrmi, 0}, + {X86::VMINMAXSDrri_Int, X86::VMINMAXSDrmi_Int, TB_NO_REVERSE}, + {X86::VMINMAXSHrri, X86::VMINMAXSHrmi, 0}, + {X86::VMINMAXSHrri_Int, X86::VMINMAXSHrmi_Int, TB_NO_REVERSE}, + {X86::VMINMAXSSrri, X86::VMINMAXSSrmi, 0}, + {X86::VMINMAXSSrri_Int, X86::VMINMAXSSrmi_Int, TB_NO_REVERSE}, {X86::VMINPBF16Z128rr, X86::VMINPBF16Z128rm, 0}, {X86::VMINPBF16Z256rr, X86::VMINPBF16Z256rm, 0}, {X86::VMINPBF16Zrr, X86::VMINPBF16Zrm, 0}, @@ -5131,9 +5134,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VMINMAXPSZ128rrikz, X86::VMINMAXPSZ128rmikz, 0}, {X86::VMINMAXPSZ256rrikz, X86::VMINMAXPSZ256rmikz, 0}, {X86::VMINMAXPSZrrikz, X86::VMINMAXPSZrmikz, 0}, - {X86::VMINMAXSDrrikz, X86::VMINMAXSDrmikz, TB_NO_REVERSE}, - {X86::VMINMAXSHrrikz, X86::VMINMAXSHrmikz, TB_NO_REVERSE}, - {X86::VMINMAXSSrrikz, X86::VMINMAXSSrmikz, TB_NO_REVERSE}, + {X86::VMINMAXSDrri_Intkz, X86::VMINMAXSDrmi_Intkz, TB_NO_REVERSE}, + {X86::VMINMAXSHrri_Intkz, X86::VMINMAXSHrmi_Intkz, TB_NO_REVERSE}, + {X86::VMINMAXSSrri_Intkz, X86::VMINMAXSSrmi_Intkz, TB_NO_REVERSE}, {X86::VMINPBF16Z128rrkz, X86::VMINPBF16Z128rmkz, 0}, {X86::VMINPBF16Z256rrkz, X86::VMINPBF16Z256rmkz, 0}, {X86::VMINPBF16Zrrkz, X86::VMINPBF16Zrmkz, 0}, @@ -6753,9 +6756,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VMINMAXPSZ128rrik, X86::VMINMAXPSZ128rmik, 0}, {X86::VMINMAXPSZ256rrik, X86::VMINMAXPSZ256rmik, 0}, {X86::VMINMAXPSZrrik, X86::VMINMAXPSZrmik, 0}, - {X86::VMINMAXSDrrik, X86::VMINMAXSDrmik, TB_NO_REVERSE}, - {X86::VMINMAXSHrrik, X86::VMINMAXSHrmik, TB_NO_REVERSE}, - {X86::VMINMAXSSrrik, X86::VMINMAXSSrmik, TB_NO_REVERSE}, + {X86::VMINMAXSDrri_Intk, X86::VMINMAXSDrmi_Intk, TB_NO_REVERSE}, + {X86::VMINMAXSHrri_Intk, X86::VMINMAXSHrmi_Intk, TB_NO_REVERSE}, + {X86::VMINMAXSSrri_Intk, X86::VMINMAXSSrmi_Intk, TB_NO_REVERSE}, {X86::VMINPBF16Z128rrk, X86::VMINPBF16Z128rmk, 0}, {X86::VMINPBF16Z256rrk, X86::VMINPBF16Z256rmk, 0}, {X86::VMINPBF16Zrrk, X86::VMINPBF16Zrmk, 0},