@@ -59,6 +59,28 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
5959 return maxnum (Src0, Src1);
6060}
6161
62+ enum class KnownIEEEMode { Unknown, On, Off };
63+
64+ // / Return KnownIEEEMode::On if we know if the use context can assume
65+ // / "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
66+ // / "amdgpu-ieee"="false".
67+ static KnownIEEEMode fpenvIEEEMode (const Instruction &I,
68+ const GCNSubtarget &ST) {
69+ if (!ST.hasIEEEMode ()) // Only mode on gfx12
70+ return KnownIEEEMode::On;
71+
72+ const Function *F = I.getFunction ();
73+ if (!F)
74+ return KnownIEEEMode::Unknown;
75+
76+ Attribute IEEEAttr = F->getFnAttribute (" amdgpu-ieee" );
77+ if (IEEEAttr.isValid ())
78+ return IEEEAttr.getValueAsBool () ? KnownIEEEMode::On : KnownIEEEMode::Off;
79+
80+ return AMDGPU::isShader (F->getCallingConv ()) ? KnownIEEEMode::Off
81+ : KnownIEEEMode::On;
82+ }
83+
6284// Check if a value can be converted to a 16-bit value without losing
6385// precision.
6486// The value is expected to be either a float (IsFloat = true) or an unsigned
@@ -843,9 +865,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
843865 break ;
844866 }
845867 case Intrinsic::amdgcn_fmed3: {
846- // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
847- // for the shader.
848-
849868 Value *Src0 = II.getArgOperand (0 );
850869 Value *Src1 = II.getArgOperand (1 );
851870 Value *Src2 = II.getArgOperand (2 );
@@ -858,16 +877,85 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
858877 if (II.isStrictFP ())
859878 break ;
860879
880+ // med3 with a nan input acts like
881+ // v_min_f32(v_min_f32(s0, s1), s2)
882+ //
883+ // Signalingness is ignored with ieee=0, so we fold to
884+ // minimumnum/maximumnum. With ieee=1, the v_min_f32 acts like llvm.minnum
885+ // with signaling nan handling. With ieee=0, like llvm.minimumnum except a
886+ // returned signaling nan will not be quieted.
887+
888+ // ieee=1
889+ // s0 snan: s2
890+ // s1 snan: s2
891+ // s2 snan: qnan
892+
893+ // s0 qnan: min(s1, s2)
894+ // s1 qnan: min(s0, s2)
895+ // s2 qnan: min(s0, s1)
896+
897+ // ieee=0
898+ // s0 _nan: min(s1, s2)
899+ // s1 _nan: min(s0, s2)
900+ // s2 _nan: min(s0, s1)
901+
861902 // Checking for NaN before canonicalization provides better fidelity when
862903 // mapping other operations onto fmed3 since the order of operands is
863904 // unchanged.
864905 Value *V = nullptr ;
865- if (match (Src0, PatternMatch::m_NaN ()) || isa<UndefValue>(Src0)) {
866- V = IC.Builder .CreateMinNum (Src1, Src2);
867- } else if (match (Src1, PatternMatch::m_NaN ()) || isa<UndefValue>(Src1)) {
868- V = IC.Builder .CreateMinNum (Src0, Src2);
869- } else if (match (Src2, PatternMatch::m_NaN ()) || isa<UndefValue>(Src2)) {
870- V = IC.Builder .CreateMinNum (Src0, Src1);
906+ const APFloat *ConstSrc0 = nullptr ;
907+ const APFloat *ConstSrc1 = nullptr ;
908+ const APFloat *ConstSrc2 = nullptr ;
909+
910+ // TODO: Also can fold to 2 operands with infinities.
911+ if ((match (Src0, m_APFloat (ConstSrc0)) && ConstSrc0->isNaN ()) ||
912+ isa<UndefValue>(Src0)) {
913+ switch (fpenvIEEEMode (II, *ST)) {
914+ case KnownIEEEMode::On:
915+ // TODO: If Src2 is snan, does it need quieting?
916+ if (ConstSrc0 && ConstSrc0->isSignaling ())
917+ return IC.replaceInstUsesWith (II, Src2);
918+ V = IC.Builder .CreateMinNum (Src1, Src2);
919+ break ;
920+ case KnownIEEEMode::Off:
921+ V = IC.Builder .CreateMinimumNum (Src1, Src2);
922+ break ;
923+ case KnownIEEEMode::Unknown:
924+ break ;
925+ }
926+ } else if ((match (Src1, m_APFloat (ConstSrc1)) && ConstSrc1->isNaN ()) ||
927+ isa<UndefValue>(Src1)) {
928+ switch (fpenvIEEEMode (II, *ST)) {
929+ case KnownIEEEMode::On:
930+ // TODO: If Src2 is snan, does it need quieting?
931+ if (ConstSrc1 && ConstSrc1->isSignaling ())
932+ return IC.replaceInstUsesWith (II, Src2);
933+
934+ V = IC.Builder .CreateMinNum (Src0, Src2);
935+ break ;
936+ case KnownIEEEMode::Off:
937+ V = IC.Builder .CreateMinimumNum (Src0, Src2);
938+ break ;
939+ case KnownIEEEMode::Unknown:
940+ break ;
941+ }
942+ } else if ((match (Src2, m_APFloat (ConstSrc2)) && ConstSrc2->isNaN ()) ||
943+ isa<UndefValue>(Src2)) {
944+ switch (fpenvIEEEMode (II, *ST)) {
945+ case KnownIEEEMode::On:
946+ if (ConstSrc2 && ConstSrc2->isSignaling ()) {
947+ auto *Quieted = ConstantFP::get (II.getType (), ConstSrc2->makeQuiet ());
948+ return IC.replaceInstUsesWith (II, Quieted);
949+ }
950+
951+ V = IC.Builder .CreateMinNum (Src0, Src1);
952+ break ;
953+ case KnownIEEEMode::Off:
954+ V = IC.Builder .CreateMaximumNum (Src0, Src1);
955+ break ;
956+ case KnownIEEEMode::Unknown:
957+ break ;
958+ }
871959 }
872960
873961 if (V) {
0 commit comments