Skip to content

Commit 6210d1f

Browse files
committed
Merge branch 'main' into xegpu_changes
2 parents ab59c46 + f83ef28 commit 6210d1f

File tree

108 files changed

+4313
-952
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+4313
-952
lines changed

clang/include/clang/Basic/Builtins.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,8 @@ class Context {
408408

409409
unsigned getRequiredVectorWidth(unsigned ID) const;
410410

411-
/// Return true if builtin ID belongs to AuxTarget.
411+
/// Return true if the builtin ID belongs exclusively to the AuxTarget,
412+
/// and false if it belongs to both primary and aux target, or neither.
412413
bool isAuxBuiltinID(unsigned ID) const {
413414
return ID >= (Builtin::FirstTSBuiltin + NumTargetBuiltins);
414415
}

clang/lib/Sema/SemaHLSL.cpp

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,9 @@ Decl *SemaHLSL::ActOnStartBuffer(Scope *BufferScope, bool CBuffer,
176176
// https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules
177177
static unsigned calculateLegacyCbufferSize(const ASTContext &Context,
178178
QualType T) {
179-
unsigned Size = 0;
180179
constexpr unsigned CBufferAlign = 16;
181180
if (const RecordType *RT = T->getAs<RecordType>()) {
181+
unsigned Size = 0;
182182
const RecordDecl *RD = RT->getDecl();
183183
for (const FieldDecl *Field : RD->fields()) {
184184
QualType Ty = Field->getType();
@@ -191,22 +191,28 @@ static unsigned calculateLegacyCbufferSize(const ASTContext &Context,
191191
Size = llvm::alignTo(Size, FieldAlign);
192192
Size += FieldSize;
193193
}
194-
} else if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T)) {
195-
if (unsigned ElementCount = AT->getSize().getZExtValue()) {
196-
unsigned ElementSize =
197-
calculateLegacyCbufferSize(Context, AT->getElementType());
198-
unsigned AlignedElementSize = llvm::alignTo(ElementSize, CBufferAlign);
199-
Size = AlignedElementSize * (ElementCount - 1) + ElementSize;
200-
}
201-
} else if (const VectorType *VT = T->getAs<VectorType>()) {
194+
return Size;
195+
}
196+
197+
if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T)) {
198+
unsigned ElementCount = AT->getSize().getZExtValue();
199+
if (ElementCount == 0)
200+
return 0;
201+
202+
unsigned ElementSize =
203+
calculateLegacyCbufferSize(Context, AT->getElementType());
204+
unsigned AlignedElementSize = llvm::alignTo(ElementSize, CBufferAlign);
205+
return AlignedElementSize * (ElementCount - 1) + ElementSize;
206+
}
207+
208+
if (const VectorType *VT = T->getAs<VectorType>()) {
202209
unsigned ElementCount = VT->getNumElements();
203210
unsigned ElementSize =
204211
calculateLegacyCbufferSize(Context, VT->getElementType());
205-
Size = ElementSize * ElementCount;
206-
} else {
207-
Size = Context.getTypeSize(T) / 8;
212+
return ElementSize * ElementCount;
208213
}
209-
return Size;
214+
215+
return Context.getTypeSize(T) / 8;
210216
}
211217

212218
// Validate packoffset:

flang/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,17 @@ if (APPLE)
467467
endif()
468468
endif()
469469

470+
# Set up job pools for flang. Some of the flang sources take a lot of memory to
471+
# compile, so allow users to limit the number of parallel flang jobs. This is
472+
# useful for building flang alongside several other projects since you can use
473+
# the maximum number of build jobs for the other projects while limiting the
474+
# number of flang compile jobs.
475+
set(FLANG_PARALLEL_COMPILE_JOBS CACHE STRING
476+
"The maximum number of concurrent compilation jobs for Flang (Ninja only)")
477+
if (FLANG_PARALLEL_COMPILE_JOBS)
478+
set_property(GLOBAL APPEND PROPERTY JOB_POOLS flang_compile_job_pool=${FLANG_PARALLEL_COMPILE_JOBS})
479+
endif()
480+
470481
include(AddFlang)
471482
include(FlangCommon)
472483

flang/cmake/modules/AddFlang.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ function(add_flang_library name)
9494
set_property(GLOBAL APPEND PROPERTY FLANG_LIBS ${name})
9595
endif()
9696
set_property(GLOBAL APPEND PROPERTY FLANG_EXPORTS ${name})
97+
if (FLANG_PARALLEL_COMPILE_JOBS)
98+
set_property(TARGET ${name} PROPERTY JOB_POOL_COMPILE flang_compile_job_pool)
99+
endif()
97100
else()
98101
# Add empty "phony" target
99102
add_custom_target(${name})

flang/include/flang/Optimizer/Builder/IntrinsicCall.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ struct IntrinsicLibrary {
336336
template <typename Shift>
337337
mlir::Value genMask(mlir::Type, llvm::ArrayRef<mlir::Value>);
338338
mlir::Value genMatchAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
339+
mlir::Value genMatchAnySync(mlir::Type, llvm::ArrayRef<mlir::Value>);
339340
fir::ExtendedValue genMatmul(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
340341
fir::ExtendedValue genMatmulTranspose(mlir::Type,
341342
llvm::ArrayRef<fir::ExtendedValue>);

flang/lib/Lower/Bridge.cpp

Lines changed: 117 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3114,50 +3114,127 @@ class FirConverter : public Fortran::lower::AbstractConverter {
31143114
llvm::SmallVector<mlir::Value> ivValues;
31153115
Fortran::lower::pft::Evaluation *loopEval =
31163116
&getEval().getFirstNestedEvaluation();
3117-
for (unsigned i = 0; i < nestedLoops; ++i) {
3118-
const Fortran::parser::LoopControl *loopControl;
3119-
mlir::Location crtLoc = loc;
3120-
if (i == 0) {
3121-
loopControl = &*outerDoConstruct->GetLoopControl();
3122-
crtLoc =
3123-
genLocation(Fortran::parser::FindSourceLocation(outerDoConstruct));
3124-
} else {
3125-
auto *doCons = loopEval->getIf<Fortran::parser::DoConstruct>();
3126-
assert(doCons && "expect do construct");
3127-
loopControl = &*doCons->GetLoopControl();
3128-
crtLoc = genLocation(Fortran::parser::FindSourceLocation(*doCons));
3117+
if (outerDoConstruct->IsDoConcurrent()) {
3118+
// Handle DO CONCURRENT
3119+
locs.push_back(
3120+
genLocation(Fortran::parser::FindSourceLocation(outerDoConstruct)));
3121+
const Fortran::parser::LoopControl *loopControl =
3122+
&*outerDoConstruct->GetLoopControl();
3123+
const auto &concurrent =
3124+
std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
3125+
3126+
if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
3127+
.empty())
3128+
TODO(loc, "DO CONCURRENT with locality spec");
3129+
3130+
const auto &concurrentHeader =
3131+
std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
3132+
const auto &controls =
3133+
std::get<std::list<Fortran::parser::ConcurrentControl>>(
3134+
concurrentHeader.t);
3135+
3136+
for (const auto &control : controls) {
3137+
mlir::Value lb = fir::getBase(genExprValue(
3138+
*Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx));
3139+
mlir::Value ub = fir::getBase(genExprValue(
3140+
*Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx));
3141+
mlir::Value step;
3142+
3143+
if (const auto &expr =
3144+
std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
3145+
control.t))
3146+
step = fir::getBase(
3147+
genExprValue(*Fortran::semantics::GetExpr(*expr), stmtCtx));
3148+
else
3149+
step = builder->create<mlir::arith::ConstantIndexOp>(
3150+
loc, 1); // Use index type directly
3151+
3152+
// Ensure lb, ub, and step are of index type using fir.convert
3153+
mlir::Type indexType = builder->getIndexType();
3154+
lb = builder->create<fir::ConvertOp>(loc, indexType, lb);
3155+
ub = builder->create<fir::ConvertOp>(loc, indexType, ub);
3156+
step = builder->create<fir::ConvertOp>(loc, indexType, step);
3157+
3158+
lbs.push_back(lb);
3159+
ubs.push_back(ub);
3160+
steps.push_back(step);
3161+
3162+
const auto &name = std::get<Fortran::parser::Name>(control.t);
3163+
3164+
// Handle induction variable
3165+
mlir::Value ivValue = getSymbolAddress(*name.symbol);
3166+
std::size_t ivTypeSize = name.symbol->size();
3167+
if (ivTypeSize == 0)
3168+
llvm::report_fatal_error("unexpected induction variable size");
3169+
mlir::Type ivTy = builder->getIntegerType(ivTypeSize * 8);
3170+
3171+
if (!ivValue) {
3172+
// DO CONCURRENT induction variables are not mapped yet since they are
3173+
// local to the DO CONCURRENT scope.
3174+
mlir::OpBuilder::InsertPoint insPt = builder->saveInsertionPoint();
3175+
builder->setInsertionPointToStart(builder->getAllocaBlock());
3176+
ivValue = builder->createTemporaryAlloc(
3177+
loc, ivTy, toStringRef(name.symbol->name()));
3178+
builder->restoreInsertionPoint(insPt);
3179+
}
3180+
3181+
// Create the hlfir.declare operation using the symbol's name
3182+
auto declareOp = builder->create<hlfir::DeclareOp>(
3183+
loc, ivValue, toStringRef(name.symbol->name()));
3184+
ivValue = declareOp.getResult(0);
3185+
3186+
// Bind the symbol to the declared variable
3187+
bindSymbol(*name.symbol, ivValue);
3188+
ivValues.push_back(ivValue);
3189+
ivTypes.push_back(ivTy);
3190+
ivLocs.push_back(loc);
31293191
}
3192+
} else {
3193+
for (unsigned i = 0; i < nestedLoops; ++i) {
3194+
const Fortran::parser::LoopControl *loopControl;
3195+
mlir::Location crtLoc = loc;
3196+
if (i == 0) {
3197+
loopControl = &*outerDoConstruct->GetLoopControl();
3198+
crtLoc = genLocation(
3199+
Fortran::parser::FindSourceLocation(outerDoConstruct));
3200+
} else {
3201+
auto *doCons = loopEval->getIf<Fortran::parser::DoConstruct>();
3202+
assert(doCons && "expect do construct");
3203+
loopControl = &*doCons->GetLoopControl();
3204+
crtLoc = genLocation(Fortran::parser::FindSourceLocation(*doCons));
3205+
}
31303206

3131-
locs.push_back(crtLoc);
3132-
3133-
const Fortran::parser::LoopControl::Bounds *bounds =
3134-
std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
3135-
assert(bounds && "Expected bounds on the loop construct");
3136-
3137-
Fortran::semantics::Symbol &ivSym =
3138-
bounds->name.thing.symbol->GetUltimate();
3139-
ivValues.push_back(getSymbolAddress(ivSym));
3140-
3141-
lbs.push_back(builder->createConvert(
3142-
crtLoc, idxTy,
3143-
fir::getBase(genExprValue(*Fortran::semantics::GetExpr(bounds->lower),
3144-
stmtCtx))));
3145-
ubs.push_back(builder->createConvert(
3146-
crtLoc, idxTy,
3147-
fir::getBase(genExprValue(*Fortran::semantics::GetExpr(bounds->upper),
3148-
stmtCtx))));
3149-
if (bounds->step)
3150-
steps.push_back(builder->createConvert(
3207+
locs.push_back(crtLoc);
3208+
3209+
const Fortran::parser::LoopControl::Bounds *bounds =
3210+
std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
3211+
assert(bounds && "Expected bounds on the loop construct");
3212+
3213+
Fortran::semantics::Symbol &ivSym =
3214+
bounds->name.thing.symbol->GetUltimate();
3215+
ivValues.push_back(getSymbolAddress(ivSym));
3216+
3217+
lbs.push_back(builder->createConvert(
31513218
crtLoc, idxTy,
31523219
fir::getBase(genExprValue(
3153-
*Fortran::semantics::GetExpr(bounds->step), stmtCtx))));
3154-
else // If `step` is not present, assume it is `1`.
3155-
steps.push_back(builder->createIntegerConstant(loc, idxTy, 1));
3156-
3157-
ivTypes.push_back(idxTy);
3158-
ivLocs.push_back(crtLoc);
3159-
if (i < nestedLoops - 1)
3160-
loopEval = &*std::next(loopEval->getNestedEvaluations().begin());
3220+
*Fortran::semantics::GetExpr(bounds->lower), stmtCtx))));
3221+
ubs.push_back(builder->createConvert(
3222+
crtLoc, idxTy,
3223+
fir::getBase(genExprValue(
3224+
*Fortran::semantics::GetExpr(bounds->upper), stmtCtx))));
3225+
if (bounds->step)
3226+
steps.push_back(builder->createConvert(
3227+
crtLoc, idxTy,
3228+
fir::getBase(genExprValue(
3229+
*Fortran::semantics::GetExpr(bounds->step), stmtCtx))));
3230+
else // If `step` is not present, assume it is `1`.
3231+
steps.push_back(builder->createIntegerConstant(loc, idxTy, 1));
3232+
3233+
ivTypes.push_back(idxTy);
3234+
ivLocs.push_back(crtLoc);
3235+
if (i < nestedLoops - 1)
3236+
loopEval = &*std::next(loopEval->getNestedEvaluations().begin());
3237+
}
31613238
}
31623239

31633240
auto op = builder->create<cuf::KernelOp>(

flang/lib/Optimizer/Analysis/AliasAnalysis.cpp

Lines changed: 39 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "flang/Optimizer/Analysis/AliasAnalysis.h"
10-
#include "flang/Optimizer/CodeGen/CGOps.h"
1110
#include "flang/Optimizer/Dialect/FIROps.h"
1211
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
1312
#include "flang/Optimizer/Dialect/FIRType.h"
@@ -62,17 +61,13 @@ getOriginalDef(mlir::Value v,
6261
mlir::Type ty = defOp->getResultTypes()[0];
6362
llvm::TypeSwitch<Operation *>(defOp)
6463
.Case<fir::ConvertOp>([&](fir::ConvertOp op) { v = op.getValue(); })
65-
.Case<fir::DeclareOp, hlfir::DeclareOp, fir::cg::XDeclareOp>(
66-
[&](auto op) {
67-
v = op.getMemref();
68-
auto varIf =
69-
llvm::dyn_cast<fir::FortranVariableOpInterface>(defOp);
70-
if (varIf) {
71-
attributes |= getAttrsFromVariable(varIf);
72-
isCapturedInInternalProcedure |=
73-
varIf.isCapturedInInternalProcedure();
74-
}
75-
})
64+
.Case<fir::DeclareOp, hlfir::DeclareOp>([&](auto op) {
65+
v = op.getMemref();
66+
auto varIf = llvm::cast<fir::FortranVariableOpInterface>(defOp);
67+
attributes |= getAttrsFromVariable(varIf);
68+
isCapturedInInternalProcedure |=
69+
varIf.isCapturedInInternalProcedure();
70+
})
7671
.Case<fir::CoordinateOp>([&](auto op) {
7772
if (fir::AliasAnalysis::isPointerReference(ty))
7873
attributes.set(fir::AliasAnalysis::Attribute::Pointer);
@@ -596,21 +591,19 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
596591
followBoxData = true;
597592
approximateSource = true;
598593
})
599-
.Case<fir::EmboxOp, fir::ReboxOp, fir::cg::XEmboxOp, fir::cg::XReboxOp>(
600-
[&](auto op) {
601-
if (followBoxData) {
602-
v = op->getOperand(0);
603-
defOp = v.getDefiningOp();
604-
} else
605-
breakFromLoop = true;
606-
})
594+
.Case<fir::EmboxOp, fir::ReboxOp>([&](auto op) {
595+
if (followBoxData) {
596+
v = op->getOperand(0);
597+
defOp = v.getDefiningOp();
598+
} else
599+
breakFromLoop = true;
600+
})
607601
.Case<fir::LoadOp>([&](auto op) {
608602
// If load is inside target and it points to mapped item,
609603
// continue tracking.
610604
Operation *loadMemrefOp = op.getMemref().getDefiningOp();
611605
bool isDeclareOp =
612606
llvm::isa_and_present<fir::DeclareOp>(loadMemrefOp) ||
613-
llvm::isa_and_present<fir::cg::XDeclareOp>(loadMemrefOp) ||
614607
llvm::isa_and_present<hlfir::DeclareOp>(loadMemrefOp);
615608
if (isDeclareOp &&
616609
llvm::isa<omp::TargetOp>(loadMemrefOp->getParentOp())) {
@@ -673,8 +666,7 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
673666
global = llvm::cast<fir::AddrOfOp>(op).getSymbol();
674667
breakFromLoop = true;
675668
})
676-
.Case<hlfir::DeclareOp, fir::DeclareOp,
677-
fir::cg::XDeclareOp>([&](auto op) {
669+
.Case<hlfir::DeclareOp, fir::DeclareOp>([&](auto op) {
678670
bool isPrivateItem = false;
679671
if (omp::BlockArgOpenMPOpInterface argIface =
680672
dyn_cast<omp::BlockArgOpenMPOpInterface>(op->getParentOp())) {
@@ -708,33 +700,30 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
708700
return;
709701
}
710702
}
711-
auto varIf = llvm::dyn_cast<fir::FortranVariableOpInterface>(defOp);
712-
if (varIf) {
713-
// While going through a declare operation collect
714-
// the variable attributes from it. Right now, some
715-
// of the attributes are duplicated, e.g. a TARGET dummy
716-
// argument has the target attribute both on its declare
717-
// operation and on the entry block argument.
718-
// In case of host associated use, the declare operation
719-
// is the only carrier of the variable attributes,
720-
// so we have to collect them here.
721-
attributes |= getAttrsFromVariable(varIf);
722-
isCapturedInInternalProcedure |=
723-
varIf.isCapturedInInternalProcedure();
724-
if (varIf.isHostAssoc()) {
725-
// Do not track past such DeclareOp, because it does not
726-
// currently provide any useful information. The host associated
727-
// access will end up dereferencing the host association tuple,
728-
// so we may as well stop right now.
729-
v = defOp->getResult(0);
730-
// TODO: if the host associated variable is a dummy argument
731-
// of the host, I think, we can treat it as SourceKind::Argument
732-
// for the purpose of alias analysis inside the internal
733-
// procedure.
734-
type = SourceKind::HostAssoc;
735-
breakFromLoop = true;
736-
return;
737-
}
703+
auto varIf = llvm::cast<fir::FortranVariableOpInterface>(defOp);
704+
// While going through a declare operation collect
705+
// the variable attributes from it. Right now, some
706+
// of the attributes are duplicated, e.g. a TARGET dummy
707+
// argument has the target attribute both on its declare
708+
// operation and on the entry block argument.
709+
// In case of host associated use, the declare operation
710+
// is the only carrier of the variable attributes,
711+
// so we have to collect them here.
712+
attributes |= getAttrsFromVariable(varIf);
713+
isCapturedInInternalProcedure |=
714+
varIf.isCapturedInInternalProcedure();
715+
if (varIf.isHostAssoc()) {
716+
// Do not track past such DeclareOp, because it does not
717+
// currently provide any useful information. The host associated
718+
// access will end up dereferencing the host association tuple,
719+
// so we may as well stop right now.
720+
v = defOp->getResult(0);
721+
// TODO: if the host associated variable is a dummy argument
722+
// of the host, I think, we can treat it as SourceKind::Argument
723+
// for the purpose of alias analysis inside the internal procedure.
724+
type = SourceKind::HostAssoc;
725+
breakFromLoop = true;
726+
return;
738727
}
739728
if (getLastInstantiationPoint) {
740729
// Fetch only the innermost instantiation point.

0 commit comments

Comments
 (0)