Skip to content

Conversation

@jayfoad
Copy link
Contributor

@jayfoad jayfoad commented Feb 13, 2025

This removes some of the complexity added by ad6cd7e by setting
OtherPredicates outside MadFmaMixPats rather than inside it.

This removes some of the complexity added by ad6cd7e by setting
OtherPredicates outside MadFmaMixPats rather than inside it.
@llvmbot
Copy link
Member

llvmbot commented Feb 13, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

Changes

This removes some of the complexity added by ad6cd7e by setting
OtherPredicates outside MadFmaMixPats rather than inside it.


Full diff: https://github.com/llvm/llvm-project/pull/127044.diff

1 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+8-11)
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 5e825e7259a95..21898da1912f5 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -154,12 +154,10 @@ defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_
 multiclass MadFmaMixPats<SDPatternOperator fma_like,
                          Instruction mix_inst,
                          Instruction mixlo_inst,
-                         Instruction mixhi_inst,
-                         bit HasFP32Denormals> {
+                         Instruction mixhi_inst> {
   // At least one of the operands needs to be an fpextend of an f16
   // for this to be worthwhile, so we need three patterns here.
   // TODO: Could we use a predicate to inspect src1/2/3 instead?
-  let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]) in {
   def : GCNPat <
     (f32 (fma_like (f32 (VOP3PMadMixModsExt f16:$src0, i32:$src0_mods)),
                    (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_mods)),
@@ -228,13 +226,12 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
                 DSTCLAMP.NONE,
                 (i32 (IMPLICIT_DEF)))
   >;
-  } // End OtherPredicates
 
   // FIXME: Special case handling for maxhi (especially for clamp)
   // because dealing with the write to high half of the register is
   // difficult.
   foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
-  let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]), True16Predicate = p in {
+  let True16Predicate = p in {
 
   def : GCNPat <
     (build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
@@ -260,9 +257,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
                        VGPR_32:$elt0))
   >;
 
-  } // end OtherPredicates
+  } // end True16Predicate
 
-  let OtherPredicates = !if(HasFP32Denormals, [TruePredicate], [NoFP32Denormals]), True16Predicate = UseRealTrue16Insts in {
+  let True16Predicate = UseRealTrue16Insts in {
   def : GCNPat <
     (build_vector (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
                             (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
@@ -297,7 +294,7 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
                        DSTCLAMP.ENABLE,
                        (REG_SEQUENCE VGPR_32, $elt0, lo16, (f16 (IMPLICIT_DEF)), hi16)))
   >;
-  } // end OtherPredicates
+  } // end True16Predicate
 }
 
 class MinimumMaximumByMinimum3Maximum3VOP3P<SDPatternOperator node,
@@ -330,9 +327,9 @@ defm V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3P_Mix_Profile<VOP_F
 }
 } // End FPDPRounding = 1
 }
-} // OtherPredicates = [NoFP32Denormals]
 
-defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16, 0 /*HasFP32Denormals*/>;
+defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
+} // OtherPredicates = [NoFP32Denormals]
 } // End SubtargetPredicate = HasMadMixInsts
 
 
@@ -353,7 +350,7 @@ defm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F
 } // End FPDPRounding = 1
 }
 
-defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16, 1 /*HasPF32Denormals*/>;
+defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
 }
 
 // Defines patterns that extract signed 4bit from each Idx[0].

Copy link
Contributor

@Sisyph Sisyph left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. The previous style was a relic from before we had True16Predicate.

@jayfoad jayfoad merged commit 3e54964 into llvm:main Feb 13, 2025
10 checks passed
@jayfoad jayfoad deleted the simplify-madfmamixpats branch February 13, 2025 14:51
joaosaffran pushed a commit to joaosaffran/llvm-project that referenced this pull request Feb 14, 2025
…vm#127044)

This removes some of the complexity added by ad6cd7e by setting
OtherPredicates outside MadFmaMixPats rather than inside it.
sivan-shani pushed a commit to sivan-shani/llvm-project that referenced this pull request Feb 24, 2025
…vm#127044)

This removes some of the complexity added by ad6cd7e by setting
OtherPredicates outside MadFmaMixPats rather than inside it.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants