folding_rules: generalise RedundantIAdd() to |^>><<+- with 0 on RHS and |^+ with 0 on LHS (KhronosGroup#6013)

nabijaczleweli · web-flow · commit fb803311a5b6 · 2025-03-05T15:49:02.000-05:00
All of OpBitwiseOr OpBitwiseXor OpShiftRightLogical OpShiftRightArithmetic OpShiftLeftLogical OpIAdd OpISub are no-ops with 0 on RHS, and all but the subs and shifts are no-ops with 0 on LHS. RedundantIAdd() already did this for OpIAdd, and it can be trivially generalised. Thus, this whole program reduces to a no-op: struct params_t { uint u; int i; float f; uint3 u3; int3 i3; float3 f3; }; RWStructuredBuffer<params_t> params; [numthreads(1, 1, 1)] void main() { params[0].u = params[0].u | 0; params[0].i = params[0].i | 0; params[0].u = params[0].u ^ 0; params[0].i = params[0].i ^ 0; params[0].u = params[0].u >> 0; params[0].i = params[0].i >> 0; params[0].u = params[0].u >> 0; params[0].i = params[0].i >> 0; params[0].u = params[0].u << 0; params[0].i = params[0].i << 0; params[0].u = params[0].u << 0; params[0].u3 = params[0].u3 | 0; params[0].i3 = params[0].i3 | 0; params[0].u3 = params[0].u3 ^ 0; params[0].i3 = params[0].i3 ^ 0; params[0].u3 = params[0].u3 >> 0; params[0].i3 = params[0].i3 >> 0; params[0].u3 = params[0].u3 >> 0; params[0].i3 = params[0].i3 >> 0; params[0].u3 = params[0].u3 << 0; params[0].i3 = params[0].i3 << 0; params[0].u3 = params[0].u3 << 0; params[0].u = params[0].u + 0; params[0].i = params[0].i + 0; params[0].f = params[0].f + 0; params[0].u = params[0].u - 0; params[0].i = params[0].i - 0; params[0].f = params[0].f - 0; params[0].u3 = params[0].u3 + 0; params[0].i3 = params[0].i3 + 0; params[0].f3 = params[0].f3 + 0; params[0].u3 = params[0].u3 - 0; params[0].i3 = params[0].i3 - 0; params[0].f3 = params[0].f3 - 0; params[0].u = 0 | params[0].u; params[0].i = 0 | params[0].i; params[0].u = 0 ^ params[0].u; params[0].i = 0 ^ params[0].i; params[0].u3 = 0 | params[0].u3; params[0].i3 = 0 | params[0].i3; params[0].u3 = 0 ^ params[0].u3; params[0].i3 = 0 ^ params[0].i3; params[0].u = 0 + params[0].u; params[0].i = 0 + params[0].i; params[0].f = 0 + params[0].f; params[0].u3 = 0 + params[0].u3; params[0].i3 = 0 + params[0].i3; params[0].f3 = 0 + params[0].f3; } Closes: google/shaderc#1484
diff --git a/source/opt/folding_rules.cpp b/source/opt/folding_rules.cpp
@@ -2507,24 +2507,17 @@ FoldingRule RedundantFMix() {
   };
 }
 
-// This rule handles addition of zero for integers.
-FoldingRule RedundantIAdd() {
-  return [](IRContext* context, Instruction* inst,
-            const std::vector<const analysis::Constant*>& constants) {
-    assert(inst->opcode() == spv::Op::OpIAdd &&
-           "Wrong opcode. Should be OpIAdd.");
+// Returns a folding rule that folds the instruction to the operand not being
+// checked if the operand that is checked is zero.
+FoldingRule RedundantBinaryOpWithZeroOperand(uint32_t arg) {
+  return [arg](IRContext* context, Instruction* inst,
+               const std::vector<const analysis::Constant*>& constants) {
+    assert(constants.size() == 2);
 
-    uint32_t operand = std::numeric_limits<uint32_t>::max();
-    const analysis::Type* operand_type = nullptr;
-    if (constants[0] && constants[0]->IsZero()) {
-      operand = inst->GetSingleWordInOperand(1);
-      operand_type = constants[0]->type();
-    } else if (constants[1] && constants[1]->IsZero()) {
-      operand = inst->GetSingleWordInOperand(0);
-      operand_type = constants[1]->type();
-    }
+    if (constants[arg] && constants[arg]->IsZero()) {
+      auto operand = inst->GetSingleWordInOperand(1 - arg);
+      auto operand_type = constants[arg]->type();
 
-    if (operand != std::numeric_limits<uint32_t>::max()) {
       const analysis::Type* inst_type =
           context->get_type_mgr()->GetType(inst->type_id());
       if (inst_type->IsSame(operand_type)) {
@@ -2539,6 +2532,38 @@ FoldingRule RedundantIAdd() {
   };
 }
 
+// This rule handles any of RedundantBinaryRhs0Ops with a 0 or vector 0 on the
+// right-hand side.
+static const constexpr spv::Op RedundantBinaryRhs0Ops[] = {
+    spv::Op::OpBitwiseOr,
+    spv::Op::OpBitwiseXor,
+    spv::Op::OpShiftRightLogical,
+    spv::Op::OpShiftRightArithmetic,
+    spv::Op::OpShiftLeftLogical,
+    spv::Op::OpIAdd,
+    spv::Op::OpISub};
+FoldingRule RedundantBinaryRhs0(spv::Op op) {
+  assert(std::find(std::begin(RedundantBinaryRhs0Ops),
+                   std::end(RedundantBinaryRhs0Ops),
+                   op) != std::end(RedundantBinaryRhs0Ops) &&
+         "Wrong opcode.");
+  (void)op;
+  return RedundantBinaryOpWithZeroOperand(1);
+}
+
+// This rule handles any of RedundantBinaryLhs0Ops with a 0 or vector 0 on the
+// left-hand side.
+static const constexpr spv::Op RedundantBinaryLhs0Ops[] = {
+    spv::Op::OpBitwiseOr, spv::Op::OpBitwiseXor, spv::Op::OpIAdd};
+FoldingRule RedundantBinaryLhs0(spv::Op op) {
+  assert(std::find(std::begin(RedundantBinaryLhs0Ops),
+                   std::end(RedundantBinaryLhs0Ops),
+                   op) != std::end(RedundantBinaryLhs0Ops) &&
+         "Wrong opcode.");
+  (void)op;
+  return RedundantBinaryOpWithZeroOperand(0);
+}
+
 // This rule look for a dot with a constant vector containing a single 1 and
 // the rest 0s.  This is the same as doing an extract.
 FoldingRule DotProductDoingExtract() {
@@ -2876,6 +2901,11 @@ void FoldingRules::AddFoldingRules() {
   // Note that the order in which rules are added to the list matters. If a rule
   // applies to the instruction, the rest of the rules will not be attempted.
   // Take that into consideration.
+  for (auto op : RedundantBinaryRhs0Ops)
+    rules_[op].push_back(RedundantBinaryRhs0(op));
+  for (auto op : RedundantBinaryLhs0Ops)
+    rules_[op].push_back(RedundantBinaryLhs0(op));
+
   rules_[spv::Op::OpBitcast].push_back(BitCastScalarOrVector());
 
   rules_[spv::Op::OpCompositeConstruct].push_back(
@@ -2921,7 +2951,6 @@ void FoldingRules::AddFoldingRules() {
   rules_[spv::Op::OpFSub].push_back(MergeSubAddArithmetic());
   rules_[spv::Op::OpFSub].push_back(MergeSubSubArithmetic());
 
-  rules_[spv::Op::OpIAdd].push_back(RedundantIAdd());
   rules_[spv::Op::OpIAdd].push_back(MergeAddNegateArithmetic());
   rules_[spv::Op::OpIAdd].push_back(MergeAddAddArithmetic());
   rules_[spv::Op::OpIAdd].push_back(MergeAddSubArithmetic());
diff --git a/test/opt/fold_test.cpp b/test/opt/fold_test.cpp
@@ -5054,7 +5054,57 @@ INSTANTIATE_TEST_SUITE_P(IntegerRedundantFoldingTest, GeneralInstructionFoldingT
             "OpReturn\n" +
             "OpFunctionEnd",
         2, 0),
-    // Test case 2: Fold n + 0
+    // Test case 2: Fold n | 0
+    InstructionFoldingCase<uint32_t>(
+        Header() + "%main = OpFunction %void None %void_func\n" +
+            "%main_lab = OpLabel\n" +
+            "%n = OpVariable %_ptr_uint Function\n" +
+            "%3 = OpLoad %uint %n\n" +
+            "%2 = OpBitwiseOr %uint %3 %uint_0\n" +
+            "OpReturn\n" +
+            "OpFunctionEnd",
+        2, 3),
+    // Test case 3: Fold n ^ 0
+    InstructionFoldingCase<uint32_t>(
+        Header() + "%main = OpFunction %void None %void_func\n" +
+            "%main_lab = OpLabel\n" +
+            "%n = OpVariable %_ptr_uint Function\n" +
+            "%3 = OpLoad %uint %n\n" +
+            "%2 = OpBitwiseXor %uint %3 %uint_0\n" +
+            "OpReturn\n" +
+            "OpFunctionEnd",
+        2, 3),
+    // Test case 4: Fold n >> 0
+    InstructionFoldingCase<uint32_t>(
+        Header() + "%main = OpFunction %void None %void_func\n" +
+            "%main_lab = OpLabel\n" +
+            "%n = OpVariable %_ptr_uint Function\n" +
+            "%3 = OpLoad %uint %n\n" +
+            "%2 = OpShiftRightLogical %uint %3 %uint_0\n" +
+            "OpReturn\n" +
+            "OpFunctionEnd",
+        2, 3),
+    // Test case 5: Fold n >> 0
+    InstructionFoldingCase<uint32_t>(
+        Header() + "%main = OpFunction %void None %void_func\n" +
+            "%main_lab = OpLabel\n" +
+            "%n = OpVariable %_ptr_uint Function\n" +
+            "%3 = OpLoad %uint %n\n" +
+            "%2 = OpShiftRightArithmetic %uint %3 %uint_0\n" +
+            "OpReturn\n" +
+            "OpFunctionEnd",
+        2, 3),
+    // Test case 6: Fold n << 0
+    InstructionFoldingCase<uint32_t>(
+        Header() + "%main = OpFunction %void None %void_func\n" +
+            "%main_lab = OpLabel\n" +
+            "%n = OpVariable %_ptr_uint Function\n" +
+            "%3 = OpLoad %uint %n\n" +
+            "%2 = OpShiftLeftLogical %uint %3 %uint_0\n" +
+            "OpReturn\n" +
+            "OpFunctionEnd",
+        2, 3),
+    // Test case 7: Fold n + 0
     InstructionFoldingCase<uint32_t>(
         Header() + "%main = OpFunction %void None %void_func\n" +
             "%main_lab = OpLabel\n" +
@@ -5064,7 +5114,37 @@ INSTANTIATE_TEST_SUITE_P(IntegerRedundantFoldingTest, GeneralInstructionFoldingT
             "OpReturn\n" +
             "OpFunctionEnd",
         2, 3),
-    // Test case 3: Fold 0 + n
+    // Test case 8: Fold n - 0
+    InstructionFoldingCase<uint32_t>(
+        Header() + "%main = OpFunction %void None %void_func\n" +
+            "%main_lab = OpLabel\n" +
+            "%n = OpVariable %_ptr_uint Function\n" +
+            "%3 = OpLoad %uint %n\n" +
+            "%2 = OpISub %uint %3 %uint_0\n" +
+            "OpReturn\n" +
+            "OpFunctionEnd",
+        2, 3),
+    // Test case 9: Fold 0 | n
+    InstructionFoldingCase<uint32_t>(
+        Header() + "%main = OpFunction %void None %void_func\n" +
+            "%main_lab = OpLabel\n" +
+            "%n = OpVariable %_ptr_uint Function\n" +
+            "%3 = OpLoad %uint %n\n" +
+            "%2 = OpBitwiseOr %uint %uint_0 %3\n" +
+            "OpReturn\n" +
+            "OpFunctionEnd",
+        2, 3),
+    // Test case 10: Fold 0 ^ n
+    InstructionFoldingCase<uint32_t>(
+        Header() + "%main = OpFunction %void None %void_func\n" +
+            "%main_lab = OpLabel\n" +
+            "%n = OpVariable %_ptr_uint Function\n" +
+            "%3 = OpLoad %uint %n\n" +
+            "%2 = OpBitwiseXor %uint %uint_0 %3\n" +
+            "OpReturn\n" +
+            "OpFunctionEnd",
+        2, 3),
+    // Test case 11: Fold 0 + n
     InstructionFoldingCase<uint32_t>(
         Header() + "%main = OpFunction %void None %void_func\n" +
             "%main_lab = OpLabel\n" +
@@ -5074,7 +5154,7 @@ INSTANTIATE_TEST_SUITE_P(IntegerRedundantFoldingTest, GeneralInstructionFoldingT
             "OpReturn\n" +
             "OpFunctionEnd",
         2, 3),
-    // Test case 4: Don't fold n + (1,0)
+    // Test case 12: Don't fold n + (1,0)
     InstructionFoldingCase<uint32_t>(
         Header() + "%main = OpFunction %void None %void_func\n" +
             "%main_lab = OpLabel\n" +
@@ -5084,7 +5164,7 @@ INSTANTIATE_TEST_SUITE_P(IntegerRedundantFoldingTest, GeneralInstructionFoldingT
             "OpReturn\n" +
             "OpFunctionEnd",
         2, 0),
-    // Test case 5: Don't fold (1,0) + n
+    // Test case 13: Don't fold (1,0) + n
     InstructionFoldingCase<uint32_t>(
         Header() + "%main = OpFunction %void None %void_func\n" +
             "%main_lab = OpLabel\n" +
@@ -5094,7 +5174,7 @@ INSTANTIATE_TEST_SUITE_P(IntegerRedundantFoldingTest, GeneralInstructionFoldingT
             "OpReturn\n" +
             "OpFunctionEnd",
         2, 0),
-    // Test case 6: Fold n + (0,0)
+    // Test case 14: Fold n + (0,0)
     InstructionFoldingCase<uint32_t>(
         Header() + "%main = OpFunction %void None %void_func\n" +
             "%main_lab = OpLabel\n" +
@@ -5104,7 +5184,7 @@ INSTANTIATE_TEST_SUITE_P(IntegerRedundantFoldingTest, GeneralInstructionFoldingT
             "OpReturn\n" +
             "OpFunctionEnd",
         2, 3),
-    // Test case 7: Fold (0,0) + n
+    // Test case 15: Fold (0,0) + n
     InstructionFoldingCase<uint32_t>(
         Header() + "%main = OpFunction %void None %void_func\n" +
             "%main_lab = OpLabel\n" +
@@ -5114,7 +5194,27 @@ INSTANTIATE_TEST_SUITE_P(IntegerRedundantFoldingTest, GeneralInstructionFoldingT
             "OpReturn\n" +
             "OpFunctionEnd",
         2, 3),
-    // Test case 8: Don't fold because of undefined value. Using 4294967295
+    // Test case 16: Fold n | (0,0)
+    InstructionFoldingCase<uint32_t>(
+        Header() + "%main = OpFunction %void None %void_func\n" +
+            "%main_lab = OpLabel\n" +
+            "%n = OpVariable %_ptr_v2int Function\n" +
+            "%3 = OpLoad %v2int %n\n" +
+            "%2 = OpBitwiseOr %v2int %3 %v2int_0_0\n" +
+            "OpReturn\n" +
+            "OpFunctionEnd",
+        2, 3),
+    // Test case 17: Fold (0,0) | n
+    InstructionFoldingCase<uint32_t>(
+        Header() + "%main = OpFunction %void None %void_func\n" +
+            "%main_lab = OpLabel\n" +
+            "%n = OpVariable %_ptr_v2int Function\n" +
+            "%3 = OpLoad %v2int %n\n" +
+            "%2 = OpBitwiseOr %v2int %v2int_0_0 %3\n" +
+            "OpReturn\n" +
+            "OpFunctionEnd",
+        2, 3),
+    // Test case 18: Don't fold because of undefined value. Using 4294967295
     // means that entry is undefined. We do not expect it to ever happen, so
     // not worth folding.
     InstructionFoldingCase<uint32_t>(
@@ -5126,7 +5226,7 @@ INSTANTIATE_TEST_SUITE_P(IntegerRedundantFoldingTest, GeneralInstructionFoldingT
             "OpReturn\n" +
             "OpFunctionEnd",
         2, 0),
-    // Test case 9: Don't fold because of undefined value. Using 4294967295
+    // Test case 19: Don't fold because of undefined value. Using 4294967295
     // means that entry is undefined. We do not expect it to ever happen, so
     // not worth folding.
     InstructionFoldingCase<uint32_t>(