improve instruction selection

VyacheslavLevytskyy · VyacheslavLevytskyy · commit 8cf4bd91d493 · 2024-12-13T08:30:45.000-08:00
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -61,6 +61,7 @@ class SPIRVInstructionSelector : public InstructionSelector {
   /// We need to keep track of the number we give to anonymous global values to
   /// generate the same name every time when this is needed.
   mutable DenseMap<const GlobalValue *, unsigned> UnnamedGlobalIDs;
+  SmallPtrSet<MachineInstr *, 8> DeadMIs;
 
 public:
   SPIRVInstructionSelector(const SPIRVTargetMachine &TM,
@@ -382,6 +383,24 @@ static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI);
 // Defined in SPIRVLegalizerInfo.cpp.
 extern bool isTypeFoldingSupported(unsigned Opcode);
 
+bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI) {
+  for (const auto &MO : MI.all_defs()) {
+    Register Reg = MO.getReg();
+    if (Reg.isPhysical() || !MRI.use_nodbg_empty(Reg))
+      return false;
+  }
+  if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE || MI.isFakeUse() ||
+      MI.isLifetimeMarker())
+    return false;
+  if (MI.isPHI())
+    return true;
+  if (MI.mayStore() || MI.isCall() ||
+      (MI.mayLoad() && MI.hasOrderedMemoryRef()) || MI.isPosition() ||
+      MI.isDebugInstr() || MI.isTerminator() || MI.isJumpTableDebugInfo())
+    return false;
+  return true;
+}
+
 bool SPIRVInstructionSelector::select(MachineInstr &I) {
   resetVRegsType(*I.getParent()->getParent());
 
@@ -404,8 +423,11 @@ bool SPIRVInstructionSelector::select(MachineInstr &I) {
           }
         });
         assert(Res || Def->getOpcode() == TargetOpcode::G_CONSTANT);
-        if (Res)
+        if (Res) {
+          if (!isTriviallyDead(*Def, *MRI) && isDead(*Def, *MRI))
+            DeadMIs.insert(Def);
           return Res;
+        }
       }
       MRI->setRegClass(SrcReg, MRI->getRegClass(DstReg));
       MRI->replaceRegWith(SrcReg, DstReg);
@@ -418,6 +440,15 @@ bool SPIRVInstructionSelector::select(MachineInstr &I) {
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
+  if (DeadMIs.contains(&I)) {
+    // if the instruction has been already made dead by folding it away
+    // erase it
+    LLVM_DEBUG(dbgs() << "Instruction is folded and dead.\n");
+    salvageDebugInfo(*MRI, I);
+    I.eraseFromParent();
+    return true;
+  }
+
   if (I.getNumOperands() != I.getNumExplicitOperands()) {
     LLVM_DEBUG(errs() << "Generic instr has unexpected implicit operands\n");
     return false;
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -228,8 +228,7 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
       .legalFor(allFloatScalarsAndVectors);
 
   getActionDefinitionsBuilder(G_STRICT_FLDEXP)
-      .legalForCartesianProduct(allFloatScalarsAndVectors,
-                                allFloatScalarsAndVectors);
+      .legalForCartesianProduct(allFloatScalarsAndVectors, allIntScalars);
 
   getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
       .legalForCartesianProduct(allIntScalarsAndVectors,
diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/constrained-arithmetic.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/constrained-arithmetic.ll
@@ -1,81 +1,44 @@
-; Adapted from https://github.com/KhronosGroup/SPIRV-LLVM-Translator/tree/main/test/llvm-intrinsics
-
 ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
 ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
 
-; CHECK: OpName %[[#ad:]] "add"
-; CHECK: OpName %[[#di:]] "div"
-; CHECK: OpName %[[#su:]] "sub"
-; CHECK: OpName %[[#mu:]] "mul"
-
-; CHECK-NOT: OpDecorate %[[#]] FPRoundingMode
-
-; CHECK-DAG: OpDecorate %[[#ad]] FPRoundingMode 0
-; CHECK-DAG: OpDecorate %[[#di]] FPRoundingMode 1
-; CHECK-DAG: OpDecorate %[[#su]] FPRoundingMode 2
-; CHECK-DAG: OpDecorate %[[#mu]] FPRoundingMode 3
-
-; CHECK-NOT: OpDecorate %[[#]] FPRoundingMode
-
-; CHECK: OpFAdd %[[#]] %[[#ad]]
-; CHECK: OpFDiv %[[#]] %[[#di]]
-; CHECK: OpFSub %[[#]] %[[#su]]
-; CHECK: OpFMul %[[#]] %[[#mu]]
-; CHECK: OpExtInst %[[#]] %[[#]] %[[#]] fma
+; CHECK-DAG: OpName %[[#r1:]] "r1"
+; CHECK-DAG: OpName %[[#r2:]] "r2"
+; CHECK-DAG: OpName %[[#r3:]] "r3"
+; CHECK-DAG: OpName %[[#r4:]] "r4"
+; CHECK-DAG: OpName %[[#r5:]] "r5"
+; CHECK-DAG: OpName %[[#r6:]] "r6"
+
+; CHECK-NOT: OpDecorate %[[#r5]] FPRoundingMode
+; CHECK-NOT: OpDecorate %[[#r6]] FPRoundingMode
+
+; CHECK-DAG: OpDecorate %[[#r1]] FPRoundingMode RTE
+; CHECK-DAG: OpDecorate %[[#r2]] FPRoundingMode RTZ
+; CHECK-DAG: OpDecorate %[[#r4]] FPRoundingMode RTN
+; CHECK-DAG: OpDecorate %[[#r3]] FPRoundingMode RTP
+
+; CHECK: OpFAdd %[[#]] %[[#]]
+; CHECK: OpFDiv %[[#]] %[[#]]
+; CHECK: OpFSub %[[#]] %[[#]]
+; CHECK: OpFMul %[[#]] %[[#]]
+; CHECK: OpExtInst %[[#]] %[[#]] fma %[[#]] %[[#]] %[[#]]
 ; CHECK: OpFRem
 
 ; Function Attrs: norecurse nounwind strictfp
-define dso_local spir_kernel void @test(float %a, i32 %in, i32 %ui) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_buffer_location !9 {
+define dso_local spir_kernel void @test(float %a, i32 %in, i32 %ui) {
 entry:
-  %add = tail call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #2
-  %add2 = fadd float %a, %a
-;  %div = tail call float @llvm.experimental.constrained.fdiv.f32(float %a, float %a, metadata !"round.towardzero", metadata !"fpexcept.strict") #2, !fpmath !10
-;  %sub = tail call float @llvm.experimental.constrained.fsub.f32(float %a, float %a, metadata !"round.upward", metadata !"fpexcept.strict") #2
-;  %mul = tail call float @llvm.experimental.constrained.fmul.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.strict") #2
-;  %r1 = tail call float @llvm.experimental.constrained.fma.f32(float %a, float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #2
-;  %r2 = tail call float @llvm.experimental.constrained.frem.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #2
+  %r1 = tail call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  %r2 = tail call float @llvm.experimental.constrained.fdiv.f32(float %a, float %a, metadata !"round.towardzero", metadata !"fpexcept.strict")
+  %r3 = tail call float @llvm.experimental.constrained.fsub.f32(float %a, float %a, metadata !"round.upward", metadata !"fpexcept.strict")
+  %r4 = tail call float @llvm.experimental.constrained.fmul.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.strict")
+  %r5 = tail call float @llvm.experimental.constrained.fma.f32(float %a, float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  %r6 = tail call float @llvm.experimental.constrained.frem.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret void
 }
 
-; Function Attrs: inaccessiblememonly nounwind willreturn
-declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) #1
-
-; Function Attrs: inaccessiblememonly nounwind willreturn
-declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) #1
-
-; Function Attrs: inaccessiblememonly nounwind willreturn
-declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) #1
-
-; Function Attrs: inaccessiblememonly nounwind willreturn
-declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) #1
-
-; Function Attrs: inaccessiblememonly nounwind willreturn
-declare float @llvm.experimental.constrained.fmuladd.f32(float, float, float, metadata, metadata) #1
-
-; Function Attrs: inaccessiblememonly nounwind willreturn
-declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) #1
-
-; Function Attrs: inaccessiblememonly nounwind willreturn
-declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata) #1
-
-attributes #0 = { norecurse nounwind strictfp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test2.cl" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { inaccessiblememonly nounwind willreturn }
-attributes #2 = { strictfp }
-
-!llvm.module.flags = !{!0}
-!opencl.ocl.version = !{!1}
-!opencl.spir.version = !{!2, !2}
-!spirv.Source = !{!3}
-!llvm.ident = !{!4}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{i32 1, i32 0}
-!2 = !{i32 1, i32 2}
-!3 = !{i32 4, i32 100000}
-!4 = !{!"clang version 12.0.0 (https://github.com/c199914007/llvm.git f0c85a8adeb49638c01eee1451aa9b35462cbfd5)"}
-!5 = !{i32 0, i32 0, i32 0}
-!6 = !{!"none", !"none", !"none"}
-!7 = !{!"float", !"int", !"uint"}
-!8 = !{!"", !"", !""}
-!9 = !{i32 -1, i32 -1, i32 -1}
-!10 = !{float 2.500000e+00}
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fmuladd.f32(float, float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata)