diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index aba3c0f80a024..fa8d1bb2d78a2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16392,12 +16392,11 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { return SDValue(); bool AllowMultipleMaybePoisonOperands = - N0.getOpcode() == ISD::SELECT_CC || - N0.getOpcode() == ISD::SETCC || + N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC || N0.getOpcode() == ISD::BUILD_VECTOR || N0.getOpcode() == ISD::BUILD_PAIR || N0.getOpcode() == ISD::VECTOR_SHUFFLE || - N0.getOpcode() == ISD::CONCAT_VECTORS; + N0.getOpcode() == ISD::CONCAT_VECTORS || N0.getOpcode() == ISD::FMUL; // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all // ones" or "constant" into something that depends on FrozenUndef. We can @@ -16495,7 +16494,17 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { SVN->getMask()); } else { // NOTE: this strips poison generating flags. - R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops); + // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan, + // ninf, nsz, or fast. + // However, contract, reassoc, afn, and arcp should be preserved, + // as these fast-math flags do not introduce poison values. + SDNodeFlags SrcFlags = N0->getFlags(); + SDNodeFlags SafeFlags; + SafeFlags.setAllowContract(SrcFlags.hasAllowContract()); + SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation()); + SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs()); + SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal()); + R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops, SafeFlags); } assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) && "Can't create node that may be undef/poison!"); diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll index a1b2dbda687fb..9a4ab9865369f 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll @@ -17,9 +17,7 @@ define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) { ; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 -; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0 +; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %mul = fmul nnan contract afn float %x, %y %mul.fr = freeze float %mul @@ -43,9 +41,7 @@ define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) { ; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 -; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0 +; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %mul = fmul nnan contract float %x, %y %mul.fr = freeze float %mul @@ -69,9 +65,7 @@ define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) { ; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 -; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-NEXT: v_add_f32_e32 v0, -1.0, v0 +; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %mul = fmul nnan contract float %x, %y %mul.fr = freeze float %mul @@ -95,12 +89,42 @@ define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) { ; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 -; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-NEXT: v_sub_f32_e32 v0, 1.0, v0 +; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %mul = fmul nnan contract float %x, %y %mul.fr = freeze float %mul %sub = fsub nnan contract float 1.000000e+00, %mul.fr ret float %sub } + +define float @fma_freeze_sink_multiple_maybe_poison_nnan_add(float %x, float %y) { +; CHECK-LABEL: fma_freeze_sink_multiple_maybe_poison_nnan_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_dual_subrev_f32 v0, 1.0, v0 :: v_dual_add_f32 v1, 1.0, v1 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %fsub_x = fsub nnan contract float %x, 1.000000e+00 + %fadd_y = fadd nnan contract float %y, 1.000000e+00 + %mul = fmul nnan contract float %fsub_x, %fadd_y + %mul.fr = freeze float %mul + %add = fadd nnan contract float %mul.fr, 1.000000e+00 + ret float %add +} + +define float @fma_freeze_sink_multiple_maybe_poison_nnan_sub(float %x, float %y) { +; CHECK-LABEL: fma_freeze_sink_multiple_maybe_poison_nnan_sub: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_dual_add_f32 v0, 1.0, v0 :: v_dual_add_f32 v1, -1.0, v1 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %fadd_x = fadd nnan contract float %x, 1.000000e+00 + %fsub_y = fsub nnan contract float %y, 1.000000e+00 + %mul = fmul nnan contract float %fadd_x, %fsub_y + %mul.fr = freeze float %mul + %sub = fsub nnan contract float %mul.fr, 1.000000e+00 + ret float %sub +}