newling
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/FoldSplitReductionAndWorkgroupMappingLoopsPass.cpp‎
Lines changed: 44 additions & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/FoldSplitReductionAndWorkgroupMappingLoopsPass.cpp‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/Passes.td‎
Lines changed: 10 additions & 1 deletion b/‎compiler/src/iree/compiler/Codegen/Common/Passes.td‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/ReconcileTranslationInfo.cpp‎
Lines changed: 20 additions & 4 deletions b/‎compiler/src/iree/compiler/Codegen/Common/ReconcileTranslationInfo.cpp‎
Lines changed: 20 additions & 4 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel‎
Lines changed: 3 additions & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt‎
Lines changed: 3 additions & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/test/fold_split_reduction_workgroup_mapping_loops.mlir‎
Lines changed: 88 additions & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/test/fold_split_reduction_workgroup_mapping_loops.mlir‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir‎
Lines changed: 59 additions & 9 deletions b/‎compiler/src/iree/compiler/Codegen/Common/test/reconcile_translation_info.mlir‎
Lines changed: 59 additions & 9 deletions
@@ -124,6 +124,7 @@ iree_compiler_cc_library(
         "FlattenMemRefSubspanPass.cpp",
         "FlattenMemRefs.cpp",
         "FoldAffineMinInDistributedLoops.cpp",
+        "FoldSplitReductionAndWorkgroupMappingLoopsPass.cpp",
         "FoldTensorExtractOpPass.cpp",
         "FoldTensorSubsetIntoVectorTransferOps.cpp",
         "ForOpCanonicalizationPass.cpp",
 
@@ -116,6 +116,7 @@ iree_cc_library(
     "FlattenMemRefSubspanPass.cpp"
     "FlattenMemRefs.cpp"
     "FoldAffineMinInDistributedLoops.cpp"
+    "FoldSplitReductionAndWorkgroupMappingLoopsPass.cpp"
     "FoldTensorExtractOpPass.cpp"
     "FoldTensorSubsetIntoVectorTransferOps.cpp"
     "ForOpCanonicalizationPass.cpp"
 
@@ -0,0 +1,44 @@
+// Copyright 2025 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Codegen/Common/Passes.h"
+#include "iree/compiler/Codegen/Transforms/Transforms.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+namespace mlir::iree_compiler {
+
+#define GEN_PASS_DEF_FOLDSPLITREDUCTIONANDWORKGROUPMAPPINGLOOPSPASS
+#include "iree/compiler/Codegen/Common/Passes.h.inc"
+
+namespace {
+
+struct FoldSplitReductionAndWorkgroupMappingLoopsPass
+    : public impl::FoldSplitReductionAndWorkgroupMappingLoopsPassBase<
+          FoldSplitReductionAndWorkgroupMappingLoopsPass> {
+  using Base::Base;
+
+  void runOnOperation() override;
+};
+
+void FoldSplitReductionAndWorkgroupMappingLoopsPass::runOnOperation() {
+  MLIRContext *context = &getContext();
+  Operation *op = getOperation();
+
+  RewritePatternSet patterns(context);
+  populateFoldSplitReductionAndWorkgroupMappingLoops(patterns);
+  if (failed(applyPatternsGreedily(op, std::move(patterns)))) {
+    op->emitOpError("failed to apply pattern to fold split reduction loop with "
+                    "workgroup for all");
+    return signalPassFailure();
+  }
+}
+
+} // namespace
+
+} // namespace mlir::iree_compiler
@@ -286,7 +286,11 @@ def ReconcileTranslationInfoPass
             clEnumValN(IREE::Codegen::WorkgroupId::IdY, "y",
               "Constrain the workgroup distribution to use only workgroups along x and y."),
             clEnumValN(IREE::Codegen::WorkgroupId::IdZ, "z",
-              "Constrain the workgroup distribution to use only workgroups along x, y and z."))}]>
+              "Constrain the workgroup distribution to use only workgroups along x, y and z."))}]>,
+    Option<"foldSplitReductionLoopIntoWorkgroupMappingLoop",
+           "fold-split-reduction-loop-into-workgroup-mapping-loop",
+           "bool", /*default=*/"true",
+           "Resolve scf.forall loops created by split reduction by folding into workgroup mapping loop">
   ];
 }
 
@@ -382,6 +386,11 @@ def FoldReshapeIntoInterfaceTensorPass :
   let summary = "Folds reshape operations into the interface bindings.";
 }
 
+def FoldSplitReductionAndWorkgroupMappingLoopsPass :
+    Pass<"iree-codegen-fold-split-reduction-and-workgroup-mapping-loops", ""> {
+  let summary  = "Folds `scf.forall` loops created by split reduction and workgroup mapping.";
+}
+
 def FoldTensorExtractOpPass :
   Pass<"iree-codegen-fold-tensor-extract-op", ""> {
   let summary = "Fold `tensor.extract` operations prior to lowering to LLVM";
 
@@ -22,6 +22,7 @@
 #include "mlir/Analysis/CallGraph.h"
 #include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Arith/Utils/Utils.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
 namespace mlir::iree_compiler {
 
@@ -470,9 +471,12 @@ resolveSplitReduceForAll(RewriterBase &rewriter, FunctionOpInterface funcOp,
   SmallVector<scf::ForallOp> splitReductionForAllOps;
   funcOp.walk([&splitReductionForAllOps](scf::ForallOp forAllOp) {
     auto mapping = forAllOp.getMapping();
-    if (!mapping || mapping->size() != 1 ||
-        !isa<IREE::LinalgExt::SplitReductionMappingAttr>(
-            mapping->getValue().front())) {
+    if (!mapping) {
+      return;
+    }
+    if (llvm::none_of(
+            mapping->getValue(),
+            llvm::IsaPred<IREE::LinalgExt::SplitReductionMappingAttr>)) {
       return;
     }
     splitReductionForAllOps.push_back(forAllOp);
@@ -619,6 +623,18 @@ getTargetFuncAttrs(IREE::Codegen::TranslationInfoAttr translationInfo) {
 void ReconcileTranslationInfoPass::runOnOperation() {
   auto variantOp = getOperation();
   auto innerModuleOp = variantOp.getInnerModule();
+  MLIRContext *context = &getContext();
+
+  if (foldSplitReductionLoopIntoWorkgroupMappingLoop) {
+    RewritePatternSet foldLoopPattern(context);
+    populateFoldSplitReductionAndWorkgroupMappingLoops(foldLoopPattern);
+    if (failed(
+            applyPatternsGreedily(innerModuleOp, std::move(foldLoopPattern)))) {
+      innerModuleOp.emitOpError(
+          "failed to fold split-reduction loop and workgroup mapping loop");
+      return signalPassFailure();
+    }
+  }
 
   // Get the symbol table of the inner module to lookup exported functions.
   SymbolTable symbolTable(innerModuleOp);
@@ -638,14 +654,14 @@ void ReconcileTranslationInfoPass::runOnOperation() {
       // Skip external functions.
       continue;
     }
+
     // Resolve workgroup distribution related `scf.forall` ops.
     if (failed(resolveWorkgroupForAll(rewriter, rootFuncOp, distributeAlong))) {
       variantOp.emitOpError(
           "failed to resolve workgroup distribution forall ops");
       return signalPassFailure();
     }
 
-    // Resolve split reduction distribution.
     if (failed(
             resolveSplitReduceForAll(rewriter, rootFuncOp, distributeAlong))) {
       variantOp.emitOpError("failed to resolve split reduction forall ops");
 
@@ -55,6 +55,7 @@ iree_lit_test_suite(
             "fold_affine_min_in_distributed_loops.mlir",
             "fold_affine_min_of_block_id.mlir",
             "fold_reshape_into_interface_tensor.mlir",
+            "fold_split_reduction_workgroup_mapping_loops.mlir",
             "fold_tensor_extract_op.mlir",
             "forop_canonicalization.mlir",
             "generic_vectorization.mlir",
@@ -92,7 +93,9 @@ iree_lit_test_suite(
             "propagate_dispatch_size_bounds.mlir",
             "propagate_reshapes_by_expansion.mlir",
             "reconcile_translation_info.mlir",
+            "reconcile_translation_info_legacy_resolve_split_reduction.mlir",
             "reconcile_translation_info_linearize.mlir",
+            "reconcile_translation_info_linearize_legacy_resolve_split_reduction.mlir",
             "reductions.mlir",
             "rematerialize_parallel_ops.mlir",
             "remove_dead_allocs.mlir",
 
@@ -50,6 +50,7 @@ iree_lit_test_suite(
     "fold_affine_min_in_distributed_loops.mlir"
     "fold_affine_min_of_block_id.mlir"
     "fold_reshape_into_interface_tensor.mlir"
+    "fold_split_reduction_workgroup_mapping_loops.mlir"
     "fold_tensor_extract_op.mlir"
     "forall_to_for.mlir"
     "forop_canonicalization.mlir"
@@ -88,7 +89,9 @@ iree_lit_test_suite(
     "propagate_dispatch_size_bounds.mlir"
     "propagate_reshapes_by_expansion.mlir"
     "reconcile_translation_info.mlir"
+    "reconcile_translation_info_legacy_resolve_split_reduction.mlir"
     "reconcile_translation_info_linearize.mlir"
+    "reconcile_translation_info_linearize_legacy_resolve_split_reduction.mlir"
     "reductions.mlir"
     "rematerialize_parallel_ops.mlir"
     "remove_dead_allocs.mlir"
 
@@ -0,0 +1,88 @@
+// RUN: iree-opt --iree-codegen-fold-split-reduction-and-workgroup-mapping-loops --split-input-file --mlir-print-local-scope --allow-unregistered-dialect %s | FileCheck %s
+
+func.func @simple_example_1dmapping(%0 : index, %1 : index, %2 : index, %3 : index,
+    %4 : index, %5 : index) {
+  scf.forall (%arg0) = (%0) to (%1) step (%2) {
+    "use1"(%arg0) : (index) -> ()
+    scf.forall (%arg1) = (%3) to (%4) step (%5) {
+      "use2"(%arg0, %arg1) : (index, index) -> ()
+    } {mapping = [#iree_codegen.workgroup_mapping<x>]}
+  } {mapping = [#iree_linalg_ext.split_reduction_mapping]}
+  return
+}
+//      CHECK: func @simple_example_1dmapping
+// CHECK-SAME:     %[[SPLIT_LB:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[SPLIT_UB:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[SPLIT_STEP:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[WG_LB:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[WG_UB:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[WG_STEP:[a-zA-Z0-9_]+]]: index
+//      CHECK:   scf.forall
+// CHECK-SAME:       %[[IV0:[a-zA-Z0-9]+]]
+// CHECK-SAME:       %[[IV1:[a-zA-Z0-9]+]]
+// CHECK-SAME:       = (%[[SPLIT_LB]], %[[WG_LB]])
+// CHECK-SAME:       to (%[[SPLIT_UB]], %[[WG_UB]])
+// CHECK-SAME:       step (%[[SPLIT_STEP]], %[[WG_STEP]])
+//      CHECK:     "use1"(%[[IV0]])
+//      CHECK:     "use2"(%[[IV0]], %[[IV1]])
+//      CHECK:     mapping = [#iree_codegen.workgroup_mapping<y>, #iree_codegen.workgroup_mapping<x>]
+
+// -----
+
+func.func @simple_example_2dmapping(%0 : index, %1 : index, %2 : index, %3 : index,
+    %4 : index) {
+  scf.forall (%arg0) = (%0) to (%1) step (%2) {
+    "use1"(%arg0) : (index) -> ()
+    scf.forall (%arg1, %arg2)  in (%3, %4) {
+      "use2"(%arg0, %arg1, %arg2) : (index, index, index) -> ()
+    } {mapping = [#iree_codegen.workgroup_mapping<y>, #iree_codegen.workgroup_mapping<x>]}
+  } {mapping = [#iree_linalg_ext.split_reduction_mapping]}
+  return
+}
+//      CHECK: func @simple_example_2dmapping
+// CHECK-SAME:     %[[SPLIT_LB:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[SPLIT_UB:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[SPLIT_STEP:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[WG_UB0:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[WG_UB1:[a-zA-Z0-9_]+]]: index
+//      CHECK:   scf.forall
+// CHECK-SAME:       %[[IV0:[a-zA-Z0-9]+]]
+// CHECK-SAME:       %[[IV1:[a-zA-Z0-9]+]]
+// CHECK-SAME:       %[[IV2:[a-zA-Z0-9]+]]
+// CHECK-SAME:       = (%[[SPLIT_LB]], 0, 0)
+// CHECK-SAME:       to (%[[SPLIT_UB]], %[[WG_UB0]], %[[WG_UB1]])
+// CHECK-SAME:       step (%[[SPLIT_STEP]], 1, 1)
+//      CHECK:     "use1"(%[[IV0]])
+//      CHECK:     "use2"(%[[IV0]], %[[IV1]], %[[IV2]])
+//      CHECK:     mapping = [#iree_codegen.workgroup_mapping<z>, #iree_codegen.workgroup_mapping<y>, #iree_codegen.workgroup_mapping<x>]
+
+// -----
+
+func.func @simple_example_3dmapping(%0 : index, %1 : index, %2 : index, %3 : index,
+    %4 : index, %5 : index) {
+  scf.forall (%arg0) = (%0) to (%1) step (%2) {
+    "use1"(%arg0) : (index) -> ()
+    scf.forall (%arg1, %arg2, %arg3) in (%3, %4, %5) {
+      "use2"(%arg1, %arg2, %arg3) : (index, index, index) -> ()
+    } {mapping = [#iree_codegen.workgroup_mapping<z>, #iree_codegen.workgroup_mapping<y>, #iree_codegen.workgroup_mapping<x>]}
+  } {mapping = [#iree_linalg_ext.split_reduction_mapping]}
+  return
+}
+//      CHECK: func @simple_example_3dmapping
+// CHECK-SAME:     %[[SPLIT_LB:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[SPLIT_UB:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[SPLIT_STEP:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[ORIG_UB0:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[ORIG_UB1:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME:     %[[ORIG_UB2:[a-zA-Z0-9_]+]]: index
+//      CHECK:   scf.forall
+// CHECK-SAME:       %[[IV0:[a-zA-Z0-9]+]]
+// CHECK-SAME:       %[[IV1:[a-zA-Z0-9]+]]
+// CHECK-SAME:       %[[IV2:[a-zA-Z0-9]+]]
+// CHECK-SAME:       %[[IV3:[a-zA-Z0-9]+]]
+// CHECK-SAME:       = (%[[SPLIT_LB]], 0, 0, 0)
+// CHECK-SAME:       to (%[[SPLIT_UB]], %[[ORIG_UB0]], %[[ORIG_UB1]], %[[ORIG_UB2]])
+// CHECK-SAME:       step (%[[SPLIT_STEP]], 1, 1, 1)
+//      CHECK:     "use1"(%[[IV0]])
+//      CHECK:     "use2"(%[[IV1]], %[[IV2]], %[[IV3]])
+//      CHECK:     mapping = [#iree_codegen.workgroup_mapping<z:1>, #iree_codegen.workgroup_mapping<z>, #iree_codegen.workgroup_mapping<y>, #iree_codegen.workgroup_mapping<x>]
@@ -722,10 +722,9 @@ hal.executable private @split_reduction_executable {
     }
   }
 }
-//   CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1, s2, s3, s4, s5] -> (((-s3 + s4) ceildiv s5) * ((s1 * s2) * s0))>
+//   CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1, s2, s3, s4, s5] -> (((s1 * s2) * s0) * ((-s3 + s4) ceildiv s5))>
 //   CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>
-//   CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0, s1, s2, s3] -> (s0 floordiv ((-s1 + s2) ceildiv s3))>
-//   CHECK-DAG: #[[MAP3:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>
+//   CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>
 //       CHECK: @split_reduction_variant
 //       CHECK:   hal.executable.export
 //  CHECK-SAME:       %[[ARG1:[a-zA-Z0-9_]+]]: index
@@ -746,11 +745,62 @@ hal.executable private @split_reduction_executable {
 //   CHECK-DAG:     %[[ORIG_UB2:.+]] = hal.interface.constant.load {{.+}} ordinal(5)
 //   CHECK-DAG:     %[[SPLIT_NPROCS:.+]] = affine.apply #[[MAP1]]()[%[[SPLIT_LB]], %[[SPLIT_UB]], %[[SPLIT_STEP]]]
 //   CHECK-DAG:     %[[IDX:.+]] = hal.interface.workgroup.id[0]
+//       CHECK:     %[[DELINEARIZE:.+]]:4 = affine.delinearize_index %[[IDX]] into (%[[SPLIT_NPROCS]], %[[ORIG_UB0]], %[[ORIG_UB1]], %[[ORIG_UB2]])
+//       CHECK:     %[[SPLITIVREPLACEMENT:.+]] = affine.apply #[[MAP2]]()[%[[DELINEARIZE]]#0, %[[SPLIT_STEP]]]
+//       CHECK:     "use1"(%[[SPLITIVREPLACEMENT]])
+//       CHECK:     "use2"(%[[DELINEARIZE]]#1, %[[DELINEARIZE]]#2, %[[DELINEARIZE]]#3)
+
+// -----
+
+// Check that having just the split reduction loop gets resolved.
+
+#pipeline_layout = #hal.pipeline.layout<constants = 3, bindings = [
+    #hal.pipeline.binding<storage_buffer, "ReadOnly">,
+    #hal.pipeline.binding<storage_buffer>]>
+hal.executable private @only_split_reduction_executable {
+  hal.executable.variant public @only_split_reduction_variant target(#hal.executable.target<"", "", {}>) {
+    hal.executable.export public @only_split_reduction layout(#pipeline_layout) count(
+        %arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index) -> (index, index, index) {
+      %x, %y, %z = iree_tensor_ext.dispatch.workgroup_count_from_slice %arg1, %arg2, %arg3
+      %return_x, %return_y, %return_z =
+          iree_tensor_ext.dispatch.workgroup_count_split_reduction_modifier(%x, %y, %z), %arg1, %arg2, %arg3
+      hal.return %return_x, %return_y, %return_z : index, index, index
+    }
+    builtin.module {
+      func.func @only_split_reduction() {
+        %cst0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
+        %cst1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
+        %cst2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
+        %0 = iree_tensor_ext.dispatch.workload.ordinal %cst0, 0 : index
+        %1 = iree_tensor_ext.dispatch.workload.ordinal %cst1, 1 : index
+        %2 = iree_tensor_ext.dispatch.workload.ordinal %cst2, 2 : index
+        scf.forall (%arg0) = (%0) to (%1) step (%2) {
+          "use1"(%arg0) : (index) -> ()
+        } {mapping = [#iree_linalg_ext.split_reduction_mapping]}
+        return
+      }
+    }
+  }
+}
+//   CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>
+//   CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2, s3] -> (s0 floordiv ((-s1 + s2) ceildiv s3))>
+//   CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>
+//       CHECK: @only_split_reduction_variant
+//       CHECK:   hal.executable.export
+//  CHECK-SAME:       %[[ARG1:[a-zA-Z0-9_]+]]: index
+//  CHECK-SAME:       %[[ARG2:[a-zA-Z0-9_]+]]: index
+//  CHECK-SAME:       %[[ARG3:[a-zA-Z0-9_]+]]: index
+//   CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
+//   CHECK-DAG:     %[[NUMWORKGROUPSX:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]], %[[ARG2]], %[[ARG3]]]
+//       CHECK:     hal.return %[[NUMWORKGROUPSX]], %[[C1]], %[[C1]]
+//       CHECK:   func @only_split_reduction
+//   CHECK-DAG:     %[[SPLIT_LB:.+]] = hal.interface.constant.load {{.+}} ordinal(0)
+//   CHECK-DAG:     %[[SPLIT_UB:.+]] = hal.interface.constant.load {{.+}} ordinal(1)
+//   CHECK-DAG:     %[[SPLIT_STEP:.+]] = hal.interface.constant.load {{.+}} ordinal(2)
+//   CHECK-DAG:     %[[SPLIT_NPROCS:.+]] = affine.apply #[[MAP0]]()[%[[SPLIT_LB]], %[[SPLIT_UB]], %[[SPLIT_STEP]]]
+//   CHECK-DAG:     %[[IDX:.+]] = hal.interface.workgroup.id[0]
 //   CHECK-DAG:     %[[COUNTX:.+]] = hal.interface.workgroup.count[0]
-//   CHECK-DAG:     %[[ORIG_COUNTZ:.+]] = affine.apply #[[MAP2]]()[%[[COUNTX]], %[[SPLIT_LB]], %[[SPLIT_UB]], %[[SPLIT_STEP]]]
-//       CHECK:     %[[DELINEARIZE:.+]]:2 = affine.delinearize_index %[[IDX]] into (%[[SPLIT_NPROCS]], %[[ORIG_COUNTZ]]
-//       CHECK:     %[[SPLITIVREPLACEMENT:.+]] = affine.apply #[[MAP3]]()[%[[DELINEARIZE]]#0, %[[SPLIT_STEP]]]
+//   CHECK-DAG:     %[[ORIGCOUNTX:.+]] = affine.apply #[[MAP1]]()[%[[COUNTX]], %[[SPLIT_LB]], %[[SPLIT_UB]], %[[SPLIT_STEP]]]
+//       CHECK:     %[[DELINEARIZE:.+]]:2 = affine.delinearize_index %[[IDX]] into (%[[SPLIT_NPROCS]], %[[ORIGCOUNTX]])
+//       CHECK:     %[[SPLITIVREPLACEMENT:.+]] = affine.apply #[[MAP2]]()[%[[DELINEARIZE]]#0, %[[SPLIT_STEP]]]
 //       CHECK:     "use1"(%[[SPLITIVREPLACEMENT]])
-//       CHECK:     %[[OTHERIVREPLACEMENTS:.+]]:3 = affine.delinearize_index %[[DELINEARIZE]]#1
-//  CHECK-SAME:       into (%[[ORIG_UB0]], %[[ORIG_UB1]], %[[ORIG_UB2]]
-//       CHECK:     "use2"(%[[OTHERIVREPLACEMENTS]]#0, %[[OTHERIVREPLACEMENTS]]#1, %[[OTHERIVREPLACEMENTS]]#2)