address farzon

kmpeng · kmpeng · commit 824225d1f989 · 2025-10-30T20:05:50.000-07:00
diff --git a/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp
@@ -71,7 +71,7 @@ void SPIRVCombinerHelper::applySPIRVDistance(MachineInstr &MI) const {
 ///   (vXf32 (g_intrinsic faceforward
 ///             (vXf32 N) (vXf32 I) (vXf32 Ng)))
 ///
-/// This only works for Vulkan targets.
+/// This only works for Vulkan shader targets.
 ///
 bool SPIRVCombinerHelper::matchSelectToFaceForward(MachineInstr &MI) const {
   if (!STI.isShader())
@@ -88,8 +88,11 @@ bool SPIRVCombinerHelper::matchSelectToFaceForward(MachineInstr &MI) const {
   CmpInst::Predicate Pred;
   if (!mi_match(CondReg, MRI,
                 m_GFCmp(m_Pred(Pred), m_Reg(DotReg), m_Reg(CondZeroReg))) ||
-      Pred != CmpInst::FCMP_OLT)
-    return false;
+      !(Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_ULT)) {
+    if (!(Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_UGT))
+      return false;
+    std::swap(DotReg, CondZeroReg);
+  }
 
   // Check if FCMP is a comparison between a dot product and 0.
   MachineInstr *DotInstr = MRI.getVRegDef(DotReg);
@@ -109,29 +112,43 @@ bool SPIRVCombinerHelper::matchSelectToFaceForward(MachineInstr &MI) const {
     return false;
 
   // Check if select's false operand is the negation of the true operand.
-  auto AreNegatedConstants = [&](Register TrueReg, Register FalseReg) {
-    const ConstantFP *TrueVal, *FalseVal;
-    if (!mi_match(TrueReg, MRI, m_GFCst(TrueVal)) ||
-        !mi_match(FalseReg, MRI, m_GFCst(FalseVal)))
+  auto AreNegatedConstantsOrSplats = [&](Register TrueReg, Register FalseReg) {
+    std::optional<FPValueAndVReg> TrueVal, FalseVal;
+    if (!mi_match(TrueReg, MRI, m_GFCstOrSplat(TrueVal)) ||
+        !mi_match(FalseReg, MRI, m_GFCstOrSplat(FalseVal)))
       return false;
-    APFloat TrueValNegated = TrueVal->getValue();
+    APFloat TrueValNegated = TrueVal->Value;
     TrueValNegated.changeSign();
-    return FalseVal->getValue().compare(TrueValNegated) == APFloat::cmpEqual;
+    return FalseVal->Value.compare(TrueValNegated) == APFloat::cmpEqual;
   };
 
-  if (!mi_match(FalseReg, MRI, m_GFNeg(m_SpecificReg(TrueReg))) &&
-      !mi_match(TrueReg, MRI, m_GFNeg(m_SpecificReg(FalseReg)))) {
-    // Check if they're constant opposites.
+  if (!mi_match(TrueReg, MRI, m_GFNeg(m_SpecificReg(FalseReg))) &&
+      !mi_match(FalseReg, MRI, m_GFNeg(m_SpecificReg(TrueReg)))) {
+    std::optional<FPValueAndVReg> MulConstant;
     MachineInstr *TrueInstr = MRI.getVRegDef(TrueReg);
     MachineInstr *FalseInstr = MRI.getVRegDef(FalseReg);
     if (TrueInstr->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
         FalseInstr->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
         TrueInstr->getNumOperands() == FalseInstr->getNumOperands()) {
       for (unsigned I = 1; I < TrueInstr->getNumOperands(); ++I)
-        if (!AreNegatedConstants(TrueInstr->getOperand(I).getReg(),
-                                 FalseInstr->getOperand(I).getReg()))
+        if (!AreNegatedConstantsOrSplats(TrueInstr->getOperand(I).getReg(),
+                                         FalseInstr->getOperand(I).getReg()))
           return false;
-    } else if (!AreNegatedConstants(TrueReg, FalseReg))
+    } else if (mi_match(TrueReg, MRI,
+                        m_GFMul(m_SpecificReg(FalseReg),
+                                m_GFCstOrSplat(MulConstant))) ||
+               mi_match(FalseReg, MRI,
+                        m_GFMul(m_SpecificReg(TrueReg),
+                                m_GFCstOrSplat(MulConstant))) ||
+               mi_match(TrueReg, MRI,
+                        m_GFMul(m_GFCstOrSplat(MulConstant),
+                                m_SpecificReg(FalseReg))) ||
+               mi_match(FalseReg, MRI,
+                        m_GFMul(m_GFCstOrSplat(MulConstant),
+                                m_SpecificReg(TrueReg)))) {
+      if (!MulConstant || !MulConstant->Value.isExactlyValue(-1.0))
+        return false;
+    } else if (!AreNegatedConstantsOrSplats(TrueReg, FalseReg))
       return false;
   }
 
@@ -140,17 +157,28 @@ bool SPIRVCombinerHelper::matchSelectToFaceForward(MachineInstr &MI) const {
 
 void SPIRVCombinerHelper::applySPIRVFaceForward(MachineInstr &MI) const {
   // Extract the operands for N, I, and Ng from the match criteria.
-  Register CondReg, TrueReg, DotReg, DotOperand1, DotOperand2;
-  if (!mi_match(MI.getOperand(0).getReg(), MRI,
-                m_GISelect(m_Reg(CondReg), m_Reg(TrueReg), m_Reg())))
-    return;
-  if (!mi_match(CondReg, MRI, m_GFCmp(m_Pred(), m_Reg(DotReg), m_Reg())))
-    return;
+  Register CondReg = MI.getOperand(1).getReg();
+  MachineInstr *CondInstr = MRI.getVRegDef(CondReg);
+  Register DotReg = CondInstr->getOperand(2).getReg();
+  CmpInst::Predicate Pred = cast<GFCmp>(CondInstr)->getCond();
+  if (Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_UGT)
+    DotReg = CondInstr->getOperand(3).getReg();
   MachineInstr *DotInstr = MRI.getVRegDef(DotReg);
-  if (!mi_match(DotReg, MRI, m_GFMul(m_Reg(DotOperand1), m_Reg(DotOperand2)))) {
+  Register DotOperand1, DotOperand2;
+  if (DotInstr->getOpcode() == TargetOpcode::G_FMUL) {
+    DotOperand1 = DotInstr->getOperand(1).getReg();
+    DotOperand2 = DotInstr->getOperand(2).getReg();
+  } else {
     DotOperand1 = DotInstr->getOperand(2).getReg();
     DotOperand2 = DotInstr->getOperand(3).getReg();
   }
+  Register TrueReg = MI.getOperand(2).getReg();
+  Register FalseReg = MI.getOperand(3).getReg();
+  MachineInstr *TrueInstr = MRI.getVRegDef(TrueReg);
+  if (TrueInstr->getOpcode() == TargetOpcode::G_FNEG ||
+      TrueInstr->getOpcode() == TargetOpcode::G_FMUL)
+    std::swap(TrueReg, FalseReg);
+  MachineInstr *FalseInstr = MRI.getVRegDef(FalseReg);
 
   Register ResultReg = MI.getOperand(0).getReg();
   Builder.setInstrAndDebugLoc(MI);
@@ -159,5 +187,25 @@ void SPIRVCombinerHelper::applySPIRVFaceForward(MachineInstr &MI) const {
       .addUse(DotOperand1)  // I
       .addUse(DotOperand2); // Ng
 
-  MI.eraseFromParent();
+  SPIRVGlobalRegistry *GR =
+      MI.getMF()->getSubtarget<SPIRVSubtarget>().getSPIRVGlobalRegistry();
+  auto RemoveAllUses = [&](Register Reg) {
+    SmallVector<MachineInstr *, 4> UsesToErase;
+    for (auto &UseMI : MRI.use_instructions(Reg))
+      UsesToErase.push_back(&UseMI);
+
+    // calling eraseFromParent to early invalidates the iterator.
+    for (auto *MIToErase : UsesToErase)
+      MIToErase->eraseFromParent();
+  };
+
+  RemoveAllUses(CondReg); // remove all uses of FCMP Result
+  GR->invalidateMachineInstr(CondInstr);
+  CondInstr->eraseFromParent(); // remove FCMP instruction
+  RemoveAllUses(DotReg);        // remove all uses of spv_fdot/G_FMUL Result
+  GR->invalidateMachineInstr(DotInstr);
+  DotInstr->eraseFromParent(); // remove spv_fdot/G_FMUL instruction
+  RemoveAllUses(FalseReg);
+  GR->invalidateMachineInstr(FalseInstr);
+  FalseInstr->eraseFromParent();
 }
diff --git a/llvm/test/CodeGen/SPIRV/GlobalISel/InstCombine/prelegalizercombiner-select-to-faceforward.mir b/llvm/test/CodeGen/SPIRV/GlobalISel/InstCombine/prelegalizercombiner-select-to-faceforward.mir
@@ -99,7 +99,7 @@ tracksRegLiveness: true
 legalized: true
 body:             |
   bb.1.entry:
-    ; CHECK-LABEL: name: faceforward_instcombine_float4
+    ; CHECK-LABEL: name: faceforward_instcombine_float4_constants
     ; CHECK-NOT: %10:_(s32) = G_FCONSTANT float 0.000000e+00
     ; CHECK-NOT: %16:_(s32) = G_FCONSTANT float -1.000000e+00
     ; CHECK-NOT: %15:_(<4 x s32>) = G_BUILD_VECTOR %16:_(s32), %16:_(s32), %16:_(s32), %16:_(s32)
@@ -130,4 +130,70 @@ body:             |
     %11:_(s1) = G_FCMP floatpred(olt), %9:_(s32), %10:_
     %12:id(<4 x s32>) = G_SELECT %11:_(s1), %13:_, %15:_
     OpReturnValue %12:id(<4 x s32>)
+---
+name:            faceforward_instcombine_float4_false_fmul
+tracksRegLiveness: true
+legalized: true
+body:             |
+  bb.1.entry:
+    ; CHECK-LABEL: name: faceforward_instcombine_float4_false_fmul
+    ; CHECK-NOT: %10:_(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NOT: %13:_(s32) = G_FCONSTANT float -1.000000e+00
+    ; CHECK-NOT: %12:_(<4 x s32>) = G_BUILD_VECTOR %13:_(s32), %13:_(s32), %13:_(s32), %13:_(s32)
+    ; CHECK-NOT: %9:_(s32) = G_INTRINSIC intrinsic(@llvm.spv.fdot), %1:vfid(<4 x s32>), %2:vfid(<4 x s32>)
+    ; CHECK-NOT: %11:_(s1) = G_FCMP floatpred(olt), %9:_(s32), %10:_
+    ; CHECK-NOT: %14:_(<4 x s32>) = G_FMUL %0:vfid, %12:_
+    ; CHECK-NOT: %15:id(<4 x s32>) = G_SELECT %11:_(s1), %0:vfid, %14:_
+    ; CHECK: %13:id(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.spv.faceforward), %3(<4 x s32>), %4(<4 x s32>), %5(<4 x s32>)
+    ; CHECK: OpReturnValue %13(<4 x s32>)
+    %4:type(s64) = OpTypeVector %3:type(s64), 4
+    %6:type(s64) = OpTypeFunction %4:type(s64), %4:type(s64), %4:type(s64), %4:type(s64)
+    %3:type(s64) = OpTypeFloat 32
+    OpName %0:vfid(<4 x s32>), 97
+    OpName %1:vfid(<4 x s32>), 98
+    OpName %2:vfid(<4 x s32>), 99
+    %5:iid(s64) = OpFunction %4:type(s64), 0, %6:type(s64)
+    %0:vfid(<4 x s32>) = OpFunctionParameter %4:type(s64)
+    %1:vfid(<4 x s32>) = OpFunctionParameter %4:type(s64)
+    %2:vfid(<4 x s32>) = OpFunctionParameter %4:type(s64)
+    OpName %5:iid(s64), 1701011814, 2003988326, 1600418401, 1953721961, 1651339107, 1600482921, 1634692198, 1717515380, 1702063201, 1970103903, 108
+    %10:_(s32) = G_FCONSTANT float 0.000000e+00
+    %13:_(s32) = G_FCONSTANT float -1.000000e+00
+    %12:_(<4 x s32>) = G_BUILD_VECTOR %13:_(s32), %13:_(s32), %13:_(s32), %13:_(s32)
+    %9:_(s32) = G_INTRINSIC intrinsic(@llvm.spv.fdot), %1:vfid(<4 x s32>), %2:vfid(<4 x s32>)
+    %11:_(s1) = G_FCMP floatpred(olt), %9:_(s32), %10:_
+    %14:_(<4 x s32>) = G_FMUL %0:vfid, %12:_
+    %15:id(<4 x s32>) = G_SELECT %11:_(s1), %0:vfid, %14:_
+    OpReturnValue %15:id(<4 x s32>)
+---
+name:            faceforward_instcombine_float4_ogt
+tracksRegLiveness: true
+legalized: true
+body:             |
+  bb.1.entry:
+    ; CHECK-LABEL: name: faceforward_instcombine_float4
+    ; CHECK-NOT: %10:_(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NOT: %9:_(s32) = G_INTRINSIC intrinsic(@llvm.spv.fdot), %1:vfid(<4 x s32>), %2:vfid(<4 x s32>)
+    ; CHECK-NOT: %11:_(s1) = G_FCMP floatpred(ogt), %10:_(s32), %9:_
+    ; CHECK-NOT: %12:_(<4 x s32>) = G_FNEG %0:vfid
+    ; CHECK-NOT: %13:id(<4 x s32>) =  G_SELECT %11:_(s1), %12:_, %0:vfid
+    ; CHECK: %11:id(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.spv.faceforward), %3(<4 x s32>), %4(<4 x s32>), %5(<4 x s32>)
+    ; CHECK: OpReturnValue %11(<4 x s32>)
+    %4:type(s64) = OpTypeVector %3:type(s64), 4
+    %6:type(s64) = OpTypeFunction %4:type(s64), %4:type(s64), %4:type(s64), %4:type(s64)
+    %3:type(s64) = OpTypeFloat 32
+    OpName %0:vfid(<4 x s32>), 97
+    OpName %1:vfid(<4 x s32>), 98
+    OpName %2:vfid(<4 x s32>), 99
+    %5:iid(s64) = OpFunction %4:type(s64), 0, %6:type(s64)
+    %0:vfid(<4 x s32>) = OpFunctionParameter %4:type(s64)
+    %1:vfid(<4 x s32>) = OpFunctionParameter %4:type(s64)
+    %2:vfid(<4 x s32>) = OpFunctionParameter %4:type(s64)
+    OpName %5:iid(s64), 1701011814, 2003988326, 1600418401, 1953721961, 1651339107, 1600482921, 1634692198, 1868510324, 29799
+    %10:_(s32) = G_FCONSTANT float 0.000000e+00
+    %9:_(s32) = G_INTRINSIC intrinsic(@llvm.spv.fdot), %1:vfid(<4 x s32>), %2:vfid(<4 x s32>)
+    %11:_(s1) = G_FCMP floatpred(ogt), %10:_(s32), %9:_
+    %12:_(<4 x s32>) = G_FNEG %0:vfid
+    %13:id(<4 x s32>) = G_SELECT %11:_(s1), %12:_, %0:vfid
+    OpReturnValue %13:id(<4 x s32>)
 
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/faceforward.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/faceforward.ll
@@ -77,7 +77,7 @@ entry:
   %spv.fdot = call float @llvm.spv.fdot.v4f32(<4 x float> %b, <4 x float> %c)
   %fcmp = fcmp olt float %spv.fdot, 0.000000e+00
   %fneg = fneg <4 x float> %a
-  %select = select i1 %fcmp, <4 x float> %a, <4 x float> %fneg
+  %select = select i1 %fcmp, <4 x float> %fneg, <4 x float> %a
   ret <4 x float> %select
  }
 
@@ -89,7 +89,7 @@ entry:
   ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_32]]
   ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] FaceForward %[[#]] %[[#arg1]] %[[#arg2]]
   %fmul = fmul float %b, %c
-  %fcmp = fcmp olt float %fmul, 0.000000e+00
+  %fcmp = fcmp olt float %fmul, -0.000000e+00
   %select = select i1 %fcmp, float 1.000000e+00, float -1.000000e+00
   ret float %select
 }
@@ -107,7 +107,48 @@ entry:
   ret <4 x float> %select
 }
 
-; The other fucntions are the test, but a entry point is required to have a valid SPIR-V module.
+define internal noundef <4 x float> @faceforward_instcombine_float4_splat(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_32]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] FaceForward %[[#]] %[[#arg1]] %[[#arg2]]
+  %spv.fdot = call float @llvm.spv.fdot.v4f32(<4 x float> %b, <4 x float> %c)
+  %fcmp = fcmp olt float %spv.fdot, 0.000000e+00
+  %select = select i1 %fcmp, <4 x float> splat (float 2.500000e+00), <4 x float> splat (float -2.500000e+00)
+  ret <4 x float> %select
+}
+
+define internal noundef <4 x float> @faceforward_instcombine_float4_false_fmul(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_32]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] FaceForward %[[#]] %[[#arg1]] %[[#arg2]]
+  %spv.fdot = call float @llvm.spv.fdot.v4f32(<4 x float> %b, <4 x float> %c)
+  %fcmp = fcmp olt float %spv.fdot, 0.000000e+00
+  %fneg = fmul <4 x float> %a, <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  %select = select i1 %fcmp, <4 x float> %a, <4 x float> %fneg
+  ret <4 x float> %select
+}
+
+define internal noundef <4 x float> @faceforward_instcombine_float4_ogt(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_32]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] FaceForward %[[#]] %[[#arg1]] %[[#arg2]]
+  %spv.fdot = call float @llvm.spv.fdot.v4f32(<4 x float> %b, <4 x float> %c)
+  %fcmp = fcmp ogt float 0.000000e+00, %spv.fdot
+  %fneg = fneg <4 x float> %a
+  %select = select i1 %fcmp, <4 x float> %fneg, <4 x float> %a
+  ret <4 x float> %select
+}
+
+; The other functions are the test, but a entry point is required to have a valid SPIR-V module.
 define void @main() #1 {
 entry:
   ret void