Skip to content

Commit 6ba03f3

Browse files
committed
[AutoBump] Merge with fixes of d586372 (Jun 11)
2 parents a5e2e47 + d586372 commit 6ba03f3

File tree

6 files changed

+24
-8
lines changed

6 files changed

+24
-8
lines changed

mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,11 @@ struct BufferizationOptions {
309309
/// bufferized or not.
310310
bool bufferizeFunctionBoundaries = false;
311311

312+
// Specifies whether to account for parallel regions in RaW analysis. If true,
313+
// then writes inside of parallel regions that write to buffers defined
314+
// outside of the parallel region will be given a new buffer.
315+
bool checkParallelRegions = true;
316+
312317
/// Certain ops have aliasing OpOperand/OpResult invariants (e.g., scf.for).
313318
/// If this flag is set to `false`, those invariants are no longer enforced
314319
/// with buffer copies.

mlir/include/mlir/Dialect/Bufferization/TransformOps/BufferizationTransformOps.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def OneShotBufferizeOp
8888
DefaultValuedAttr<BoolAttr, "false">:$dump_alias_sets,
8989
DefaultValuedAttr<BoolAttr, "false">:$test_analysis_only,
9090
DefaultValuedAttr<BoolAttr, "false">:$print_conflicts,
91+
DefaultValuedAttr<BoolAttr, "true">:$check_parallel_regions,
9192
DefaultValuedAttr<StrAttr, "\"memref.copy\"">:$memcpy_op);
9293

9394
let results = (outs TransformHandleTypeInterface:$transformed);

mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,8 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
501501
Option<"bufferizeBodilessFunctionResults", "bufferize-bodiless-function-results",
502502
"bool", /*default=*/"0",
503503
"Bufferize results of bodiless functions.">,
504+
Option<"checkParallelRegions", "check-parallel-regions", "bool",
505+
/*default=*/"true", "Account for parallel regions in RaW analysis.">,
504506
Option<"copyBeforeWrite", "copy-before-write", "bool", /*default=*/"false",
505507
"Skip the analysis. Make a buffer copy on every write.">,
506508
ListOption<"dialectFilter", "dialect-filter", "std::string",

mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ struct OneShotBufferizePass
227227
opt.testAnalysisOnly = testAnalysisOnly;
228228
opt.bufferizeFunctionBoundaries = bufferizeFunctionBoundaries;
229229
opt.bufferizeBodilessFunctionResults = bufferizeBodilessFunctionResults;
230+
opt.checkParallelRegions = checkParallelRegions;
230231
opt.noAnalysisFuncFilter = noAnalysisFuncFilter;
231232

232233
// Configure type converter.

mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,7 @@ hasReadAfterWriteInterference(const DenseSet<OpOperand *> &usesRead,
611611
// Before going through the main RaW analysis, find cases where a buffer must
612612
// be privatized due to parallelism. If the result of a write is never read,
613613
// privatization is not necessary (and large parts of the IR are likely dead).
614-
if (!usesRead.empty()) {
614+
if (options.checkParallelRegions && !usesRead.empty()) {
615615
for (OpOperand *uConflictingWrite : usesWrite) {
616616
// Find the allocation point or last write (definition) of the buffer.
617617
// Note: In contrast to `findDefinitions`, this also returns results of

mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s
1+
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s --check-prefixes=CHECK,PARALLEL-CHECK
2+
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only check-parallel-regions=false" -split-input-file | FileCheck %s --check-prefixes=CHECK,NO-PARALLEL-CHECK
23

34
// Run fuzzer with different seeds.
45
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null
@@ -811,8 +812,10 @@ func.func @parallel_region() -> tensor<320xf32>
811812
%0 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %alloc0) -> (tensor<320xf32>) {
812813
%val = "test.foo"() : () -> (f32)
813814
// linalg.fill must bufferize out-of-place because every thread needs a
814-
// private copy of %alloc1.
815-
// CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
815+
// private copy of %alloc1. If not accounting for parallel regions, the fill
816+
// can bufferize in place.
817+
// PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
818+
// NO-PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
816819
%fill = linalg.fill ins(%val : f32) outs(%alloc1 : tensor<1xf32>) -> tensor<1xf32>
817820
scf.forall.in_parallel {
818821
// CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
@@ -841,8 +844,10 @@ func.func @parallel_region_mixed_def(%c: i1) -> tensor<320xf32>
841844
}
842845
%val = "test.foo"() : () -> (f32)
843846
// linalg.fill must bufferize out-of-place because every thread needs a
844-
// private copy of %alloc1.
845-
// CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
847+
// private copy of %alloc1. If not accounting for parallel regions, the fill
848+
// can bufferize in place.
849+
// PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
850+
// NO-PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
846851
%fill = linalg.fill ins(%val : f32) outs(%selected : tensor<1xf32>) -> tensor<1xf32>
847852
scf.forall.in_parallel {
848853
// CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
@@ -866,8 +871,10 @@ func.func @parallel_region_two_writes(%f: f32) -> tensor<320xf32>
866871
%0 = scf.forall (%arg0) in (%c320) shared_outs(%arg1 = %alloc0) -> (tensor<320xf32>) {
867872
%val = "test.foo"() : () -> (f32)
868873
// linalg.fill must bufferize out-of-place because every thread needs a
869-
// private copy of %alloc1.
870-
// CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
874+
// private copy of %alloc1. If not accounting for parallel regions, the fill
875+
// can bufferize in place.
876+
// PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
877+
// NO-PARALLEL-CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
871878
%fill = linalg.fill ins(%val : f32) outs(%alloc1 : tensor<1xf32>) -> tensor<1xf32>
872879
// CHECK: tensor.insert
873880
// CHECK-SAME: __inplace_operands_attr__ = ["none", "true", "none"]

0 commit comments

Comments
 (0)