Skip to content

Commit 3f3f002

Browse files
committed
Merge remote-tracking branch 'origin/main' into vplan-resume-phi-using-PHI
2 parents d3df2c3 + 9ea4924 commit 3f3f002

File tree

23 files changed

+504
-63
lines changed

23 files changed

+504
-63
lines changed

clang/lib/AST/Expr.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1470,8 +1470,8 @@ static unsigned SizeOfCallExprInstance(Expr::StmtClass SC) {
14701470

14711471
// changing the size of SourceLocation, CallExpr, and
14721472
// subclasses requires careful considerations
1473-
static_assert(sizeof(SourceLocation) == 4 && sizeof(CXXOperatorCallExpr) == 32,
1474-
"we assume CXXOperatorCallExpr is 32 bytes");
1473+
static_assert(sizeof(SourceLocation) == 4 && sizeof(CXXOperatorCallExpr) <= 32,
1474+
"we assume CXXOperatorCallExpr is at most 32 bytes");
14751475

14761476
CallExpr::CallExpr(StmtClass SC, Expr *Fn, ArrayRef<Expr *> PreArgs,
14771477
ArrayRef<Expr *> Args, QualType Ty, ExprValueKind VK,

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "clang/Basic/TargetInfo.h"
3030
#include "clang/Frontend/FrontendDiagnostic.h"
3131
#include "llvm/IR/InlineAsm.h"
32+
#include "llvm/IR/Instruction.h"
3233
#include "llvm/IR/Intrinsics.h"
3334
#include "llvm/IR/IntrinsicsX86.h"
3435
#include "llvm/IR/MatrixBuilder.h"
@@ -4190,6 +4191,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
41904191
Matrix, Dst.emitRawPointer(*this),
41914192
Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
41924193
MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4194+
addInstToNewSourceAtom(cast<Instruction>(Result), Matrix);
41934195
return RValue::get(Result);
41944196
}
41954197

@@ -4350,7 +4352,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
43504352
Value *SizeVal = EmitScalarExpr(E->getArg(1));
43514353
EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
43524354
E->getArg(0)->getExprLoc(), FD, 0);
4353-
Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4355+
auto *I = Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4356+
addInstToNewSourceAtom(I, nullptr);
43544357
return RValue::get(nullptr);
43554358
}
43564359

@@ -4365,7 +4368,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
43654368
EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)),
43664369
E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
43674370
0);
4368-
Builder.CreateMemMove(Dest, Src, SizeVal, false);
4371+
auto *I = Builder.CreateMemMove(Dest, Src, SizeVal, false);
4372+
addInstToNewSourceAtom(I, nullptr);
43694373
return RValue::get(nullptr);
43704374
}
43714375

@@ -4378,7 +4382,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
43784382
Value *SizeVal = EmitScalarExpr(E->getArg(2));
43794383
EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
43804384
EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4381-
Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4385+
auto *I = Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4386+
addInstToNewSourceAtom(I, nullptr);
43824387
if (BuiltinID == Builtin::BImempcpy ||
43834388
BuiltinID == Builtin::BI__builtin_mempcpy)
43844389
return RValue::get(Builder.CreateInBoundsGEP(
@@ -4394,7 +4399,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
43944399
E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
43954400
EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
43964401
EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4397-
Builder.CreateMemCpyInline(Dest, Src, Size);
4402+
auto *I = Builder.CreateMemCpyInline(Dest, Src, Size);
4403+
addInstToNewSourceAtom(I, nullptr);
43984404
return RValue::get(nullptr);
43994405
}
44004406

@@ -4415,7 +4421,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
44154421
Address Dest = EmitPointerWithAlignment(E->getArg(0));
44164422
Address Src = EmitPointerWithAlignment(E->getArg(1));
44174423
Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4418-
Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4424+
auto *I = Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4425+
addInstToNewSourceAtom(I, nullptr);
44194426
return RValue::get(Dest, *this);
44204427
}
44214428

@@ -4441,7 +4448,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
44414448
Address Dest = EmitPointerWithAlignment(E->getArg(0));
44424449
Address Src = EmitPointerWithAlignment(E->getArg(1));
44434450
Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4444-
Builder.CreateMemMove(Dest, Src, SizeVal, false);
4451+
auto *I = Builder.CreateMemMove(Dest, Src, SizeVal, false);
4452+
addInstToNewSourceAtom(I, nullptr);
44454453
return RValue::get(Dest, *this);
44464454
}
44474455

@@ -4461,7 +4469,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
44614469
.getQuantity()));
44624470
EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
44634471
EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4464-
Builder.CreateMemMove(Dest, Src, SizeVal, false);
4472+
auto *I = Builder.CreateMemMove(Dest, Src, SizeVal, false);
4473+
addInstToNewSourceAtom(I, nullptr);
44654474
return RValue::get(Dest, *this);
44664475
}
44674476
case Builtin::BImemset:
@@ -4472,7 +4481,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
44724481
Value *SizeVal = EmitScalarExpr(E->getArg(2));
44734482
EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
44744483
E->getArg(0)->getExprLoc(), FD, 0);
4475-
Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4484+
auto *I = Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4485+
addInstToNewSourceAtom(I, ByteVal);
44764486
return RValue::get(Dest, *this);
44774487
}
44784488
case Builtin::BI__builtin_memset_inline: {
@@ -4484,7 +4494,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
44844494
EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)),
44854495
E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
44864496
0);
4487-
Builder.CreateMemSetInline(Dest, ByteVal, Size);
4497+
auto *I = Builder.CreateMemSetInline(Dest, ByteVal, Size);
4498+
addInstToNewSourceAtom(I, nullptr);
44884499
return RValue::get(nullptr);
44894500
}
44904501
case Builtin::BI__builtin___memset_chk: {
@@ -4501,7 +4512,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
45014512
Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
45024513
Builder.getInt8Ty());
45034514
Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4504-
Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4515+
auto *I = Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4516+
addInstToNewSourceAtom(I, nullptr);
45054517
return RValue::get(Dest, *this);
45064518
}
45074519
case Builtin::BI__builtin_wmemchr: {
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
2+
// RUN: %clang_cc1 -triple x86_64-linux-gnu -gkey-instructions -x c++ %s -debug-info-kind=line-tables-only -gno-column-info -emit-llvm -o - -ftrivial-auto-var-init=zero -fenable-matrix -disable-llvm-passes \
3+
// RUN: | FileCheck %s --implicit-check-not atomGroup --implicit-check-not atomRank
4+
5+
// RUN: %clang_cc1 -triple x86_64-linux-gnu -gkey-instructions -x c %s -debug-info-kind=line-tables-only -gno-column-info -emit-llvm -o - -ftrivial-auto-var-init=zero -fenable-matrix -disable-llvm-passes \
6+
// RUN: | FileCheck %s --implicit-check-not atomGroup --implicit-check-not atomRank
7+
8+
typedef float m2x2 __attribute__((matrix_type(2, 2)));
9+
m2x2 mat;
10+
float f4[4];
11+
float f8[8];
12+
int v = 3;
13+
14+
void fun() {
15+
// CHECK: %a = alloca ptr, align 8
16+
// CHECK: %0 = alloca i8, i64 4{{.*}}, !dbg [[G1R2:!.*]]
17+
// CHECK: call void @llvm.memset{{.*}}, !dbg [[G1R1:!.*]], !annotation
18+
// CHECK: store ptr %0, ptr %a{{.*}}, !dbg [[G1R1:!.*]]
19+
void *a = __builtin_alloca(4);
20+
21+
// CHECK: %1 = alloca i8, i64 4{{.*}}, !dbg [[G2R2:!.*]]
22+
// CHECK: call void @llvm.memset{{.*}}, !dbg [[G2R1:!.*]], !annotation
23+
// CHECK: store ptr %1, ptr %b{{.*}}, !dbg [[G2R1:!.*]]
24+
void *b = __builtin_alloca_with_align(4, 8);
25+
26+
// CHECK: %2 = load <4 x float>, ptr @mat{{.*}}, !dbg [[G3R2:!.*]]
27+
// CHECK: call void @llvm.matrix.column.major.store.v4f32{{.*}}, !dbg [[G3R1:!.*]]
28+
__builtin_matrix_column_major_store(mat, f4, sizeof(float) * 2);
29+
30+
// CHECK: call void @llvm.memset{{.*}}, !dbg [[G4R1:!.*]]
31+
__builtin_bzero(f4, sizeof(float) * 2);
32+
33+
// CHECK: call void @llvm.memmove{{.*}}, !dbg [[G5R1:!.*]]
34+
__builtin_bcopy(f4, f8, sizeof(float) * 4);
35+
36+
// CHECK: call void @llvm.memcpy{{.*}}, !dbg [[G6R1:!.*]]
37+
__builtin_memcpy(f4, f8, sizeof(float) * 4);
38+
39+
// CHECK: call void @llvm.memcpy{{.*}}, !dbg [[G7R1:!.*]]
40+
__builtin_mempcpy(f4, f8, sizeof(float) * 4);
41+
42+
// CHECK: call void @llvm.memcpy{{.*}}, !dbg [[G8R1:!.*]]
43+
__builtin_memcpy_inline(f4, f8, sizeof(float) * 4);
44+
45+
// CHECK: call void @llvm.memcpy{{.*}}, !dbg [[G9R1:!.*]]
46+
__builtin___memcpy_chk(f4, f8, sizeof(float) * 4, -1);
47+
48+
// CHECK: call void @llvm.memmove{{.*}}, !dbg [[G10R1:!.*]]
49+
__builtin___memmove_chk(f4, f8, sizeof(float) * 4, -1);
50+
51+
// CHECK: call void @llvm.memmove{{.*}}, !dbg [[G11R1:!.*]]
52+
__builtin_memmove(f4, f8, sizeof(float) * 4);
53+
54+
// CHECK: call void @llvm.memset{{.*}}, !dbg [[G12R1:!.*]]
55+
__builtin_memset(f4, 0, sizeof(float) * 4);
56+
57+
// CHECK: call void @llvm.memset{{.*}}, !dbg [[G13R1:!.*]]
58+
__builtin_memset_inline(f4, 0, sizeof(float) * 4);
59+
60+
// CHECK: call void @llvm.memset{{.*}}, !dbg [[G14R1:!.*]]
61+
__builtin___memset_chk(f4, 0, sizeof(float), -1);
62+
63+
// CHECK: %3 = load i32, ptr @v{{.*}}, !dbg [[G15R3:!.*]]
64+
// CHECK-NEXT: %4 = trunc i32 %3 to i8, !dbg [[G15R2:!.*]]
65+
// CHECK-NEXT: call void @llvm.memset{{.*}}, !dbg [[G15R1:!.*]]
66+
__builtin_memset(f4, v, sizeof(float) * 4);
67+
}
68+
69+
// CHECK: [[G1R2]] = !DILocation({{.*}}, atomGroup: 1, atomRank: 2)
70+
// CHECK: [[G1R1]] = !DILocation({{.*}}, atomGroup: 1, atomRank: 1)
71+
// CHECK: [[G2R2]] = !DILocation({{.*}}, atomGroup: 2, atomRank: 2)
72+
// CHECK: [[G2R1]] = !DILocation({{.*}}, atomGroup: 2, atomRank: 1)
73+
// CHECK: [[G3R2]] = !DILocation({{.*}}, atomGroup: 3, atomRank: 2)
74+
// CHECK: [[G3R1]] = !DILocation({{.*}}, atomGroup: 3, atomRank: 1)
75+
// CHECK: [[G4R1]] = !DILocation({{.*}}, atomGroup: 4, atomRank: 1)
76+
// CHECK: [[G5R1]] = !DILocation({{.*}}, atomGroup: 5, atomRank: 1)
77+
// CHECK: [[G6R1]] = !DILocation({{.*}}, atomGroup: 6, atomRank: 1)
78+
// CHECK: [[G7R1]] = !DILocation({{.*}}, atomGroup: 7, atomRank: 1)
79+
// CHECK: [[G8R1]] = !DILocation({{.*}}, atomGroup: 8, atomRank: 1)
80+
// CHECK: [[G9R1]] = !DILocation({{.*}}, atomGroup: 9, atomRank: 1)
81+
// CHECK: [[G10R1]] = !DILocation({{.*}}, atomGroup: 10, atomRank: 1)
82+
// CHECK: [[G11R1]] = !DILocation({{.*}}, atomGroup: 11, atomRank: 1)
83+
// CHECK: [[G12R1]] = !DILocation({{.*}}, atomGroup: 12, atomRank: 1)
84+
// CHECK: [[G13R1]] = !DILocation({{.*}}, atomGroup: 13, atomRank: 1)
85+
// CHECK: [[G14R1]] = !DILocation({{.*}}, atomGroup: 14, atomRank: 1)
86+
// CHECK: [[G15R3]] = !DILocation({{.*}}, atomGroup: 15, atomRank: 3)
87+
// CHECK: [[G15R2]] = !DILocation({{.*}}, atomGroup: 15, atomRank: 2)
88+
// CHECK: [[G15R1]] = !DILocation({{.*}}, atomGroup: 15, atomRank: 1)

flang/include/flang/Lower/AbstractConverter.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,10 @@ class AbstractConverter {
348348
virtual Fortran::lower::SymbolBox
349349
lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0;
350350

351+
/// Find the symbol in the inner-most level of the local map or return null.
352+
virtual Fortran::lower::SymbolBox
353+
shallowLookupSymbol(const Fortran::semantics::Symbol &sym) = 0;
354+
351355
/// Return the mlir::SymbolTable associated to the ModuleOp.
352356
/// Look-ups are faster using it than using module.lookup<>,
353357
/// but the module op should be queried in case of failure

flang/include/flang/Optimizer/Dialect/FIROps.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,10 @@ class CoordinateIndicesAdaptor {
147147
mlir::ValueRange values;
148148
};
149149

150+
struct LocalitySpecifierOperands {
151+
llvm::SmallVector<::mlir::Value> privateVars;
152+
llvm::SmallVector<::mlir::Attribute> privateSyms;
153+
};
150154
} // namespace fir
151155

152156
#endif // FORTRAN_OPTIMIZER_DIALECT_FIROPS_H

flang/include/flang/Optimizer/Dialect/FIROps.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3605,6 +3605,21 @@ def fir_LocalitySpecifierOp : fir_Op<"local", [IsolatedFromAbove]> {
36053605
];
36063606

36073607
let extraClassDeclaration = [{
3608+
mlir::BlockArgument getInitMoldArg() {
3609+
auto &region = getInitRegion();
3610+
return region.empty() ? nullptr : region.getArgument(0);
3611+
}
3612+
mlir::BlockArgument getInitPrivateArg() {
3613+
auto &region = getInitRegion();
3614+
return region.empty() ? nullptr : region.getArgument(1);
3615+
}
3616+
3617+
/// Returns true if the init region might read from the mold argument
3618+
bool initReadsFromMold() {
3619+
mlir::BlockArgument moldArg = getInitMoldArg();
3620+
return moldArg && !moldArg.use_empty();
3621+
}
3622+
36083623
/// Get the type for arguments to nested regions. This should
36093624
/// generally be either the same as getType() or some pointer
36103625
/// type (pointing to the type allocated by this op).

flang/lib/Lower/Bridge.cpp

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
#include "flang/Lower/Bridge.h"
1414

15+
#include "OpenMP/DataSharingProcessor.h"
16+
#include "OpenMP/Utils.h"
1517
#include "flang/Lower/Allocatable.h"
1618
#include "flang/Lower/CallInterface.h"
1719
#include "flang/Lower/Coarray.h"
@@ -1142,6 +1144,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
11421144
return name;
11431145
}
11441146

1147+
/// Find the symbol in the inner-most level of the local map or return null.
1148+
Fortran::lower::SymbolBox
1149+
shallowLookupSymbol(const Fortran::semantics::Symbol &sym) override {
1150+
if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
1151+
return v;
1152+
return {};
1153+
}
1154+
11451155
private:
11461156
FirConverter() = delete;
11471157
FirConverter(const FirConverter &) = delete;
@@ -1216,14 +1226,6 @@ class FirConverter : public Fortran::lower::AbstractConverter {
12161226
return {};
12171227
}
12181228

1219-
/// Find the symbol in the inner-most level of the local map or return null.
1220-
Fortran::lower::SymbolBox
1221-
shallowLookupSymbol(const Fortran::semantics::Symbol &sym) {
1222-
if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
1223-
return v;
1224-
return {};
1225-
}
1226-
12271229
/// Find the symbol in one level up of symbol map such as for host-association
12281230
/// in OpenMP code or return null.
12291231
Fortran::lower::SymbolBox
@@ -2027,9 +2029,34 @@ class FirConverter : public Fortran::lower::AbstractConverter {
20272029
void handleLocalitySpecs(const IncrementLoopInfo &info) {
20282030
Fortran::semantics::SemanticsContext &semanticsContext =
20292031
bridge.getSemanticsContext();
2030-
for (const Fortran::semantics::Symbol *sym : info.localSymList)
2032+
// TODO Extract `DataSharingProcessor` from omp to a more general location.
2033+
Fortran::lower::omp::DataSharingProcessor dsp(
2034+
*this, semanticsContext, getEval(),
2035+
/*useDelayedPrivatization=*/true, localSymbols);
2036+
fir::LocalitySpecifierOperands privateClauseOps;
2037+
auto doConcurrentLoopOp =
2038+
mlir::dyn_cast_if_present<fir::DoConcurrentLoopOp>(info.loopOp);
2039+
// TODO Promote to using `enableDelayedPrivatization` (which is enabled by
2040+
// default unlike the staging flag) once the implementation of this is more
2041+
// complete.
2042+
bool useDelayedPriv =
2043+
enableDelayedPrivatizationStaging && doConcurrentLoopOp;
2044+
2045+
for (const Fortran::semantics::Symbol *sym : info.localSymList) {
2046+
if (useDelayedPriv) {
2047+
dsp.privatizeSymbol<fir::LocalitySpecifierOp>(sym, &privateClauseOps);
2048+
continue;
2049+
}
2050+
20312051
createHostAssociateVarClone(*sym, /*skipDefaultInit=*/false);
2052+
}
2053+
20322054
for (const Fortran::semantics::Symbol *sym : info.localInitSymList) {
2055+
if (useDelayedPriv) {
2056+
dsp.privatizeSymbol<fir::LocalitySpecifierOp>(sym, &privateClauseOps);
2057+
continue;
2058+
}
2059+
20332060
createHostAssociateVarClone(*sym, /*skipDefaultInit=*/true);
20342061
const auto *hostDetails =
20352062
sym->detailsIf<Fortran::semantics::HostAssocDetails>();
@@ -2048,6 +2075,24 @@ class FirConverter : public Fortran::lower::AbstractConverter {
20482075
sym->detailsIf<Fortran::semantics::HostAssocDetails>();
20492076
copySymbolBinding(hostDetails->symbol(), *sym);
20502077
}
2078+
2079+
if (useDelayedPriv) {
2080+
doConcurrentLoopOp.getLocalVarsMutable().assign(
2081+
privateClauseOps.privateVars);
2082+
doConcurrentLoopOp.setLocalSymsAttr(
2083+
builder->getArrayAttr(privateClauseOps.privateSyms));
2084+
2085+
for (auto [sym, privateVar] : llvm::zip_equal(
2086+
dsp.getAllSymbolsToPrivatize(), privateClauseOps.privateVars)) {
2087+
auto arg = doConcurrentLoopOp.getRegion().begin()->addArgument(
2088+
privateVar.getType(), doConcurrentLoopOp.getLoc());
2089+
bindSymbol(*sym, hlfir::translateToExtendedValue(
2090+
privateVar.getLoc(), *builder, hlfir::Entity{arg},
2091+
/*contiguousHint=*/true)
2092+
.first);
2093+
}
2094+
}
2095+
20512096
// Note that allocatable, types with ultimate components, and type
20522097
// requiring finalization are forbidden in LOCAL/LOCAL_INIT (F2023 C1130),
20532098
// so no clean-up needs to be generated for these entities.

0 commit comments

Comments
 (0)