From a5e1b79a42366fa4ba1c5cc586c7e92cf3c442b3 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Mon, 17 Mar 2025 14:35:11 +0800 Subject: [PATCH] [SDAG] Handle extract_subvector in isKnownNeverNaN Propagate nnan across extract_subvector. --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 +- llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll | 94 +++++++------------ .../RISCV/rvv/fixed-vectors-fmaximum.ll | 20 ++-- .../RISCV/rvv/fixed-vectors-fminimum.ll | 20 ++-- 4 files changed, 52 insertions(+), 85 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index df30148b78b65..0b2d182375881 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5723,7 +5723,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); } - case ISD::EXTRACT_VECTOR_ELT: { + case ISD::EXTRACT_VECTOR_ELT: + case ISD::EXTRACT_SUBVECTOR: { return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); } case ISD::BUILD_VECTOR: { diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll index 9949b823dfec1..15cb404a3840a 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll @@ -1287,55 +1287,55 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s ; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v4f32_clamp_postcvt: ; SDAG-GFX1100-TRUE16: ; %bb.0: ; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l -; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v7.l ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.l ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l +; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.l +; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l +; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v10.l, v3.l +; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v11.l, v5.l ; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) -; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] -; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v6, v7, v4 op_sel_hi:[1,1,1] +; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v6, v7, v8 op_sel_hi:[1,1,1] clamp +; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v9, v10, v11 op_sel_hi:[1,1,1] clamp ; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v2, v2 clamp -; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0 clamp +; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; SDAG-GFX1100-TRUE16-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 ; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v4f32_clamp_postcvt: ; SDAG-GFX1100-FAKE16: ; %bb.0: ; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] -; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp ; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp -; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v7, v7 clamp +; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; SDAG-GFX1100-FAKE16-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 ; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; SDAG-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt: -; SDAG-GFX900: ; %bb.0: -; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] -; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] -; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp -; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v7, v7 clamp -; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mov_b32_e32 v0, v6 +; GFX900-NEXT: v_mov_b32_e32 v1, v2 +; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; SDAG-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt: -; SDAG-GFX906: ; %bb.0: -; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] -; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] -; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp -; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v7, v7 clamp -; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt: +; GFX906: ; %bb.0: +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX906-NEXT: v_mov_b32_e32 v0, v6 +; GFX906-NEXT: v_mov_b32_e32 v1, v2 +; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt: ; SDAG-VI: ; %bb.0: @@ -1426,28 +1426,6 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s ; GISEL-GFX1100-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 ; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt: -; GISEL-GFX900: ; %bb.0: -; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp -; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6 -; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt: -; GISEL-GFX906: ; %bb.0: -; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp -; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6 -; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, v2 -; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] -; ; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt: ; GISEL-VI: ; %bb.0: ; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll index e17ad303eddb8..04e73ac1ea956 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll @@ -290,17 +290,14 @@ define <2 x half> @vfmax_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) { ret <2 x half> %v } -; FIXME: The nnan from fadd isn't propagating. define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmax_v2f16_vv_nnana: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; ZVFH-NEXT: vmfeq.vv v0, v9, v9 -; ZVFH-NEXT: vfadd.vv v8, v8, v8 -; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0 -; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0 -; ZVFH-NEXT: vfmax.vv v8, v10, v8 +; ZVFH-NEXT: vmv1r.v v10, v9 +; ZVFH-NEXT: vfadd.vv v10, v8, v8, v0.t +; ZVFH-NEXT: vfmax.vv v8, v10, v9 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnana: @@ -327,16 +324,13 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ret <2 x half> %v } -; FIXME: The nnan from fadd isn't propagating. define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmax_v2f16_vv_nnanb: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vfadd.vv v9, v9, v9 -; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 -; ZVFH-NEXT: vmfeq.vv v0, v9, v9 -; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vmv1r.v v10, v8 +; ZVFH-NEXT: vfadd.vv v10, v9, v9, v0.t ; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll index 1362055c4dabf..a0334a9a5d20a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll @@ -290,17 +290,14 @@ define <2 x half> @vfmin_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) { ret <2 x half> %v } -; FIXME: The nnan from fadd isn't propagating. define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmin_v2f16_vv_nnana: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; ZVFH-NEXT: vmfeq.vv v0, v9, v9 -; ZVFH-NEXT: vfadd.vv v8, v8, v8 -; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0 -; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0 -; ZVFH-NEXT: vfmin.vv v8, v10, v8 +; ZVFH-NEXT: vmv1r.v v10, v9 +; ZVFH-NEXT: vfadd.vv v10, v8, v8, v0.t +; ZVFH-NEXT: vfmin.vv v8, v10, v9 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnana: @@ -327,16 +324,13 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ret <2 x half> %v } -; FIXME: The nnan from fadd isn't propagating. define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmin_v2f16_vv_nnanb: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vfadd.vv v9, v9, v9 -; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 -; ZVFH-NEXT: vmfeq.vv v0, v9, v9 -; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vmv1r.v v10, v8 +; ZVFH-NEXT: vfadd.vv v10, v9, v9, v0.t ; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ;