iree-org
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/ConvertForallToGenericNestWorkgroup.cpp‎
Lines changed: 70 additions & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/ConvertForallToGenericNestWorkgroup.cpp‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/ConvertWorkgroupForallToPCF.cpp‎
Lines changed: 2 additions & 1 deletion b/‎compiler/src/iree/compiler/Codegen/Common/ConvertWorkgroupForallToPCF.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/Passes.h‎
Lines changed: 1 addition & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/Passes.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/Passes.td‎
Lines changed: 17 additions & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/Passes.td‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/test/convert_forall_to_generic_nest_workgroup.mlir‎
Lines changed: 29 additions & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/test/convert_forall_to_generic_nest_workgroup.mlir‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Dialect/GPU/ExternalInterfaces/GPUScopeExternalModels.cpp‎
Lines changed: 10 additions & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Dialect/GPU/ExternalInterfaces/GPUScopeExternalModels.cpp‎
Lines changed: 10 additions & 0 deletions
@@ -161,6 +161,7 @@ iree_compiler_cc_library(
         "TensorToVectorVectorizePad.cpp",
         "TestExecutablePreprocessing.cpp",
         "TestPartitionableLoopsInterface.cpp",
+        "ConvertForallToGenericNestWorkgroup.cpp",
         "TileAndDistributeToWorkgroupsPass.cpp",
         "TileAndFuseUtils.cpp",
         "TileDispatchUsingForall.cpp",
 
@@ -154,6 +154,7 @@ iree_cc_library(
     "TensorToVectorVectorizePad.cpp"
     "TestExecutablePreprocessing.cpp"
     "TestPartitionableLoopsInterface.cpp"
+    "ConvertForallToGenericNestWorkgroup.cpp"
     "TileAndDistributeToWorkgroupsPass.cpp"
     "TileAndFuseUtils.cpp"
     "TileDispatchUsingForall.cpp"
 
@@ -0,0 +1,70 @@
+// Copyright 2026 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Codegen/Common/Passes.h"
+#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
+#include "iree/compiler/Codegen/Dialect/PCF/IR/PCF.h"
+#include "iree/compiler/Codegen/Dialect/PCF/Transforms/Transforms.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/IR/PatternMatch.h"
+
+namespace mlir::iree_compiler {
+
+#define GEN_PASS_DEF_CONVERTFORALLTOGENERICNESTWORKGROUPPASS
+#include "iree/compiler/Codegen/Common/Passes.h.inc"
+
+namespace {
+
+/// Returns true if the forall op has WorkgroupMappingAttr mapping attributes.
+static bool hasWorkgroupMapping(scf::ForallOp forallOp) {
+  std::optional<ArrayAttr> mapping = forallOp.getMapping();
+  if (!mapping || mapping->empty()) {
+    return false;
+  }
+  return llvm::all_of(mapping.value(),
+                      llvm::IsaPred<IREE::Codegen::WorkgroupMappingAttr>);
+}
+
+struct ConvertForallToGenericNestWorkgroupPass final
+    : public impl::ConvertForallToGenericNestWorkgroupPassBase<
+          ConvertForallToGenericNestWorkgroupPass> {
+  using Base::Base;
+
+  void runOnOperation() override {
+    MLIRContext *ctx = &getContext();
+
+    // Always use linearized workgroup scope (1 id).
+    // Interface is implemented via external models hence the cast.
+    auto scope = cast<IREE::PCF::ScopeAttrInterface>(
+        IREE::Codegen::WorkgroupScopeAttr::get(ctx, /*linearize=*/true));
+
+    SmallVector<IREE::PCF::ScopeAttrInterface> scopes = {scope};
+
+    IRRewriter rewriter(ctx);
+    SmallVector<scf::ForallOp> forallOps;
+    getOperation()->walk([&](scf::ForallOp forallOp) {
+      // Only convert foralls with workgroup mapping attributes.
+      if (hasWorkgroupMapping(forallOp)) {
+        forallOps.push_back(forallOp);
+      }
+    });
+
+    for (scf::ForallOp forallOp : forallOps) {
+      rewriter.setInsertionPoint(forallOp);
+      FailureOr<IREE::PCF::GenericOp> result =
+          IREE::PCF::convertForallToGenericNest(rewriter, forallOp, scopes);
+      if (failed(result)) {
+        forallOp.emitError("failed to convert forall to generic nest");
+        return signalPassFailure();
+      }
+      // Replace forall results with generic results.
+      rewriter.replaceOp(forallOp, result->getResults());
+    }
+  }
+};
+
+} // namespace
+} // namespace mlir::iree_compiler
@@ -53,7 +53,8 @@ ConvertWorkgroupForall::matchAndRewrite(scf::ForallOp op,
   auto scope = cast<IREE::PCF::ScopeAttrInterface>(
       IREE::Codegen::WorkgroupScopeAttr::get(rewriter.getContext(),
                                              /*linearize=*/true));
-  FailureOr<IREE::PCF::LoopOp> res = convertForallToPCF(rewriter, op, scope, 1);
+  FailureOr<IREE::PCF::LoopOp> res =
+      convertForallToPCFLoop(rewriter, op, scope, 1);
   if (failed(res)) {
     return failure();
   }
 
@@ -18,6 +18,7 @@
 #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
 #include "iree/compiler/Codegen/Utils/Utils.h"
 #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/Transform/IR/TransformOps.h"
 
@@ -155,6 +155,23 @@ def ConvertWorkgroupForallToPCFPass
   let dependentDialects = ["iree_compiler::IREE::PCF::PCFDialect"];
 }
 
+def ConvertForallToGenericNestWorkgroupPass
+    : InterfacePass<"iree-codegen-convert-forall-to-generic-nest-workgroup",
+                    "mlir::FunctionOpInterface"> {
+  let summary = "Converts scf.forall ops with workgroup mapping to pcf.generic";
+  let description = [{
+    Converts `scf.forall` ops with `#iree_codegen.workgroup_mapping` attributes
+    to a `pcf.generic` op using workgroup scope. The pass always linearizes
+    workgroup IDs to a single dimension.
+  }];
+  let dependentDialects = [
+    "iree_compiler::IREE::PCF::PCFDialect",
+    "arith::ArithDialect",
+    "affine::AffineDialect",
+    "scf::SCFDialect"
+  ];
+}
+
 def CombineLayoutTransformationPass :
     InterfacePass<"iree-codegen-combine-layout-transformation", "mlir::FunctionOpInterface"> {
   let summary =
 
@@ -33,6 +33,7 @@ iree_lit_test_suite(
             "convert_accgemm_to_gemm.mlir",
             "convert_bf16_arith_to_f32.mlir",
             "convert_bf16_to_uint16_buffers.mlir",
+            "convert_forall_to_generic_nest_workgroup.mlir",
             "convert_hal_descriptor_type_to_gpu_address_space.mlir",
             "convert_to_destination_passing_style.mlir",
             "convert_unsupported_float_arith.mlir",
 
@@ -28,6 +28,7 @@ iree_lit_test_suite(
     "convert_accgemm_to_gemm.mlir"
     "convert_bf16_arith_to_f32.mlir"
     "convert_bf16_to_uint16_buffers.mlir"
+    "convert_forall_to_generic_nest_workgroup.mlir"
     "convert_hal_descriptor_type_to_gpu_address_space.mlir"
     "convert_to_destination_passing_style.mlir"
     "convert_unsupported_float_arith.mlir"
 
@@ -0,0 +1,29 @@
+// RUN: iree-opt %s --pass-pipeline="builtin.module(func.func(iree-codegen-convert-forall-to-generic-nest-workgroup))" --allow-unregistered-dialect --split-input-file | FileCheck %s
+
+// Test that workgroup scope creates 1 id/count pair with linearized scope.
+
+// CHECK-LABEL: func.func @test_workgroup_scope
+// CHECK:       pcf.generic
+// CHECK-SAME:    scope(#iree_codegen.workgroup_scope<linearize>)
+// CHECK:         execute(%{{.*}})[%[[ID:.+]]: index, %[[COUNT:.+]]: index]
+// Chunk size computed from total iterations / worker count.
+// CHECK:         %[[CHUNK:.+]] = arith.ceildivui
+// Start = id * chunk_size.
+// CHECK:         %[[START:.+]] = arith.muli %[[ID]], %[[CHUNK]]
+// End = min(start + chunk_size, total).
+// CHECK:         %[[END_RAW:.+]] = arith.addi %[[START]], %[[CHUNK]]
+// CHECK:         %[[END:.+]] = arith.minui %[[END_RAW]]
+// CHECK:         scf.forall (%[[IV:.+]]) = (%[[START]]) to (%[[END]])
+// CHECK:           "foo.body"(%[[IV]])
+// CHECK:           pcf.write_slice
+// CHECK:         pcf.return
+func.func @test_workgroup_scope(%init: tensor<64xf32>) -> tensor<64xf32> {
+  %result = scf.forall (%i) in (64) shared_outs(%out = %init) -> tensor<64xf32> {
+    "foo.body"(%i) : (index) -> ()
+    %slice = tensor.extract_slice %out[%i] [1] [1] : tensor<64xf32> to tensor<1xf32>
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %slice into %out[%i] [1] [1] : tensor<1xf32> into tensor<64xf32>
+    }
+  } {mapping = [#iree_codegen.workgroup_mapping<x>]}
+  return %result : tensor<64xf32>
+}
@@ -83,6 +83,11 @@ struct SubgroupScopeModel
                                         MLIRContext *context) const {
     return gpu::AddressSpaceAttr::get(context, gpu::AddressSpace::Workgroup);
   }
+
+  int64_t getNativeNumProcessorIds(Attribute attr) const {
+    // SubgroupScope natively provides a single 1D processor ID (subgroup_id).
+    return 1;
+  }
 };
 
 /// External model for LaneScopeAttr implementing ScopeAttrInterface.
@@ -133,6 +138,11 @@ struct LaneScopeModel
     // logic to allocate + subview.
     return failure();
   }
+
+  int64_t getNativeNumProcessorIds(Attribute attr) const {
+    // LaneScope natively provides a single 1D processor ID (lane_id).
+    return 1;
+  }
 };
 
 } // namespace