Skip to content

Commit 0ffaec8

Browse files
committed
Do not create temp when a pointer is assigned to itself.
Allow lowering `pointer(:) = pointer(:) * scalar` without creating a temp.
1 parent 43f8eb1 commit 0ffaec8

File tree

2 files changed

+107
-7
lines changed

2 files changed

+107
-7
lines changed

flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -420,27 +420,38 @@ void ArrayCopyAnalysis::arrayMentions(
420420
loadMapSets.insert({load, visited});
421421
}
422422

423+
static bool hasPointerType(mlir::Type type) {
424+
if (auto boxTy = type.dyn_cast<BoxType>())
425+
type = boxTy.getEleTy();
426+
return type.isa<fir::PointerType>();
427+
}
428+
423429
/// Is there a conflict between the array value that was updated and to be
424430
/// stored to `st` and the set of arrays loaded (`reach`) and used to compute
425431
/// the updated value?
426432
static bool conflictOnLoad(llvm::ArrayRef<mlir::Operation *> reach,
427433
ArrayMergeStoreOp st) {
428434
mlir::Value load;
429-
auto addr = st.memref();
430-
auto stEleTy = dyn_cast_ptrOrBoxEleTy(addr.getType());
435+
mlir::Value addr = st.memref();
436+
const bool storeHasPointerType = hasPointerType(addr.getType());
437+
mlir::Type stEleTy =
438+
fir::unwrapSequenceType(fir::unwrapPassByRefType(addr.getType()));
431439
for (auto *op : reach)
432440
if (auto ld = mlir::dyn_cast<ArrayLoadOp>(op)) {
433-
auto ldTy = ld.memref().getType();
434-
if (auto boxTy = ldTy.dyn_cast<BoxType>())
435-
ldTy = boxTy.getEleTy();
436-
if (ldTy.isa<fir::PointerType>() && stEleTy == dyn_cast_ptrEleTy(ldTy))
437-
return true;
441+
mlir::Type ldTy = ld.memref().getType();
442+
mlir::Type ldEleTy =
443+
fir::unwrapSequenceType(fir::unwrapPassByRefType(ldTy));
438444
if (ld.memref() == addr) {
439445
if (ld.getResult() != st.original())
440446
return true;
441447
if (load)
448+
// TODO: only return if the loads may overlap (look at slices if any).
442449
return true;
443450
load = ld;
451+
} else if ((hasPointerType(ldTy) || storeHasPointerType) &&
452+
stEleTy == ldEleTy) {
453+
// TODO: Use target attribute to restrict this case further.
454+
return true;
444455
}
445456
}
446457
return false;
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// Test array-copy-value pass (copy elision) with array assignment
2+
// involving Fortran pointers. Focus in only on wether copy ellision
3+
// is made or not.
4+
// RUN: fir-opt %s --array-value-copy | FileCheck %s
5+
6+
// Test `pointer(:) = array(:)`
7+
// TODO: array should have target attribute.
8+
// CHECK-LABEL: func @maybe_overlap
9+
// CHECK: fir.allocmem !fir.array<100xf32>
10+
func @maybe_overlap(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>>) {
11+
%c100 = arith.constant 100 : index
12+
%c99 = arith.constant 99 : index
13+
%c1 = arith.constant 1 : index
14+
%c0 = arith.constant 0 : index
15+
%0 = fir.alloca f32
16+
%1 = fir.shape %c100 : (index) -> !fir.shape<1>
17+
%2 = fir.array_load %arg0(%1) : (!fir.ptr<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
18+
%3 = fir.array_load %arg1(%1) : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
19+
%4 = fir.do_loop %arg2 = %c0 to %c99 step %c1 unordered iter_args(%arg3 = %2) -> (!fir.array<100xf32>) {
20+
%5 = fir.array_fetch %3, %arg2 : (!fir.array<100xf32>, index) -> f32
21+
%6 = fir.array_update %arg3, %5, %arg2 : (!fir.array<100xf32>, f32, index) -> !fir.array<100xf32>
22+
fir.result %6 : !fir.array<100xf32>
23+
}
24+
fir.array_merge_store %2, %4 to %arg0 : !fir.array<100xf32>, !fir.array<100xf32>, !fir.ptr<!fir.array<100xf32>>
25+
return
26+
}
27+
28+
// Test `pointer(:) = pointer(:)`
29+
// CHECK-LABEL: func @no_overlap
30+
// CHECK-NOT: fir.allocmem
31+
func @no_overlap(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>>) {
32+
%c100 = arith.constant 100 : index
33+
%c99 = arith.constant 99 : index
34+
%c1 = arith.constant 1 : index
35+
%c0 = arith.constant 0 : index
36+
%0 = fir.alloca f32
37+
%1 = fir.shape %c100 : (index) -> !fir.shape<1>
38+
%2 = fir.array_load %arg0(%1) : (!fir.ptr<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
39+
%3 = fir.do_loop %arg2 = %c0 to %c99 step %c1 unordered iter_args(%arg3 = %2) -> (!fir.array<100xf32>) {
40+
%4 = fir.array_fetch %2, %arg2 : (!fir.array<100xf32>, index) -> f32
41+
%5 = fir.array_update %arg3, %4, %arg2 : (!fir.array<100xf32>, f32, index) -> !fir.array<100xf32>
42+
fir.result %5 : !fir.array<100xf32>
43+
}
44+
fir.array_merge_store %2, %3 to %arg0 : !fir.array<100xf32>, !fir.array<100xf32>, !fir.ptr<!fir.array<100xf32>>
45+
return
46+
}
47+
48+
// Test `array(:) = pointer(:)`
49+
// TODO: array should have target attribute.
50+
// CHECK-LABEL: func @maybe_overlap_2
51+
// CHECK: fir.allocmem !fir.array<100xf32>
52+
func @maybe_overlap_2(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>>) {
53+
%c100 = arith.constant 100 : index
54+
%c99 = arith.constant 99 : index
55+
%c1 = arith.constant 1 : index
56+
%c0 = arith.constant 0 : index
57+
%0 = fir.alloca f32
58+
%1 = fir.shape %c100 : (index) -> !fir.shape<1>
59+
%2 = fir.array_load %arg0(%1) : (!fir.ptr<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
60+
%3 = fir.array_load %arg1(%1) : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
61+
%4 = fir.do_loop %arg2 = %c0 to %c99 step %c1 unordered iter_args(%arg3 = %3) -> (!fir.array<100xf32>) {
62+
%5 = fir.array_fetch %2, %arg2 : (!fir.array<100xf32>, index) -> f32
63+
%6 = fir.array_update %arg3, %5, %arg2 : (!fir.array<100xf32>, f32, index) -> !fir.array<100xf32>
64+
fir.result %6 : !fir.array<100xf32>
65+
}
66+
fir.array_merge_store %3, %4 to %arg1 : !fir.array<100xf32>, !fir.array<100xf32>, !fir.ref<!fir.array<100xf32>>
67+
return
68+
}
69+
70+
// Test `pointer1(:) = pointer2(:)`
71+
// CHECK-LABEL: func @maybe_overlap_3
72+
// CHECK: fir.allocmem !fir.array<100xf32>
73+
func @maybe_overlap_3(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ptr<!fir.array<100xf32>>) {
74+
%c100 = arith.constant 100 : index
75+
%c99 = arith.constant 99 : index
76+
%c1 = arith.constant 1 : index
77+
%c0 = arith.constant 0 : index
78+
%0 = fir.alloca f32
79+
%1 = fir.shape %c100 : (index) -> !fir.shape<1>
80+
%2 = fir.array_load %arg0(%1) : (!fir.ptr<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
81+
%3 = fir.array_load %arg1(%1) : (!fir.ptr<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
82+
%4 = fir.do_loop %arg2 = %c0 to %c99 step %c1 unordered iter_args(%arg3 = %3) -> (!fir.array<100xf32>) {
83+
%5 = fir.array_fetch %2, %arg2 : (!fir.array<100xf32>, index) -> f32
84+
%6 = fir.array_update %arg3, %5, %arg2 : (!fir.array<100xf32>, f32, index) -> !fir.array<100xf32>
85+
fir.result %6 : !fir.array<100xf32>
86+
}
87+
fir.array_merge_store %3, %4 to %arg1 : !fir.array<100xf32>, !fir.array<100xf32>, !fir.ptr<!fir.array<100xf32>>
88+
return
89+
}

0 commit comments

Comments
 (0)