Skip to content

Commit f66cbcf

Browse files
authored
Merge branch 'llvm:main' into counted-by-on-struct-pointers
2 parents 409b18a + 3001387 commit f66cbcf

File tree

37 files changed

+489
-350
lines changed

37 files changed

+489
-350
lines changed

clang/docs/LibASTImporter.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ Note, there may be several different ASTImporter objects which import into the s
468468
cxxRecordDecl(hasName("Y"), isDefinition()), ToUnit);
469469
ToYDef->dump();
470470
// An error is set for "ToYDef" in the shared state.
471-
Optional<ASTImportError> OptErr =
471+
std::optional<ASTImportError> OptErr =
472472
ImporterState->getImportDeclErrorIfAny(ToYDef);
473473
assert(OptErr);
474474

clang/lib/Sema/SemaARM.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1109,7 +1109,10 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
11091109
default: return false;
11101110
case AArch64::BI__builtin_arm_dmb:
11111111
case AArch64::BI__builtin_arm_dsb:
1112-
case AArch64::BI__builtin_arm_isb: l = 0; u = 15; break;
1112+
case AArch64::BI__builtin_arm_isb:
1113+
l = 0;
1114+
u = 15;
1115+
break;
11131116
case AArch64::BI__builtin_arm_tcancel: l = 0; u = 65535; break;
11141117
}
11151118

flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def cuf_KernelLaunchOp : cuf_Op<"kernel_launch", [CallOpInterface,
200200

201201
let arguments = (ins SymbolRefAttr:$callee, I32:$grid_x, I32:$grid_y,
202202
I32:$grid_z, I32:$block_x, I32:$block_y, I32:$block_z,
203-
Optional<I32>:$bytes, Optional<AnyIntegerType>:$stream,
203+
Optional<I32>:$bytes, Optional<fir_ReferenceType>:$stream,
204204
Variadic<AnyType>:$args, OptionalAttr<DictArrayAttr>:$arg_attrs,
205205
OptionalAttr<DictArrayAttr>:$res_attrs);
206206

@@ -237,6 +237,8 @@ def cuf_KernelLaunchOp : cuf_Op<"kernel_launch", [CallOpInterface,
237237
*this, getNbNoArgOperand(), getArgs().size() - 1);
238238
}
239239
}];
240+
241+
let hasVerifier = 1;
240242
}
241243

242244
def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,

flang/lib/Lower/ConvertCall.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -589,7 +589,7 @@ Fortran::lower::genCallOpAndResult(
589589

590590
mlir::Value stream; // stream is optional.
591591
if (caller.getCallDescription().chevrons().size() > 3)
592-
stream = fir::getBase(converter.genExprValue(
592+
stream = fir::getBase(converter.genExprAddr(
593593
caller.getCallDescription().chevrons()[3], stmtCtx));
594594

595595
builder.create<cuf::KernelLaunchOp>(

flang/lib/Lower/OpenACC.cpp

Lines changed: 111 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,27 +1804,38 @@ static void privatizeIv(Fortran::lower::AbstractConverter &converter,
18041804
builder.restoreInsertionPoint(insPt);
18051805
}
18061806

1807-
std::string recipeName =
1808-
fir::getTypeAsString(ivValue.getType(), converter.getKindMap(),
1809-
Fortran::lower::privatizationRecipePrefix);
1810-
auto recipe = Fortran::lower::createOrGetPrivateRecipe(
1811-
builder, recipeName, loc, ivValue.getType());
1807+
mlir::Operation *privateOp = nullptr;
1808+
for (auto privateVal : privateOperands) {
1809+
if (mlir::acc::getVar(privateVal.getDefiningOp()) == ivValue) {
1810+
privateOp = privateVal.getDefiningOp();
1811+
break;
1812+
}
1813+
}
18121814

1813-
std::stringstream asFortran;
1814-
asFortran << Fortran::lower::mangle::demangleName(toStringRef(sym.name()));
1815-
auto op = createDataEntryOp<mlir::acc::PrivateOp>(
1816-
builder, loc, ivValue, asFortran, {}, true, /*implicit=*/true,
1817-
mlir::acc::DataClause::acc_private, ivValue.getType(),
1818-
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
1815+
if (privateOp == nullptr) {
1816+
std::string recipeName =
1817+
fir::getTypeAsString(ivValue.getType(), converter.getKindMap(),
1818+
Fortran::lower::privatizationRecipePrefix);
1819+
auto recipe = Fortran::lower::createOrGetPrivateRecipe(
1820+
builder, recipeName, loc, ivValue.getType());
1821+
1822+
std::stringstream asFortran;
1823+
asFortran << Fortran::lower::mangle::demangleName(toStringRef(sym.name()));
1824+
auto op = createDataEntryOp<mlir::acc::PrivateOp>(
1825+
builder, loc, ivValue, asFortran, {}, true, /*implicit=*/true,
1826+
mlir::acc::DataClause::acc_private, ivValue.getType(),
1827+
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
1828+
privateOp = op.getOperation();
18191829

1820-
privateOperands.push_back(op.getAccVar());
1821-
privatizations.push_back(mlir::SymbolRefAttr::get(builder.getContext(),
1822-
recipe.getSymName().str()));
1830+
privateOperands.push_back(op.getAccVar());
1831+
privatizations.push_back(mlir::SymbolRefAttr::get(
1832+
builder.getContext(), recipe.getSymName().str()));
1833+
}
18231834

18241835
// Map the new private iv to its symbol for the scope of the loop. bindSymbol
18251836
// might create a hlfir.declare op, if so, we map its result in order to
18261837
// use the sym value in the scope.
1827-
converter.bindSymbol(sym, op.getAccVar());
1838+
converter.bindSymbol(sym, mlir::acc::getAccVar(privateOp));
18281839
auto privateValue = converter.getSymbolAddress(sym);
18291840
if (auto declareOp =
18301841
mlir::dyn_cast<hlfir::DeclareOp>(privateValue.getDefiningOp()))
@@ -1863,92 +1874,6 @@ static mlir::acc::LoopOp createLoopOp(
18631874
crtDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
18641875
builder.getContext(), mlir::acc::DeviceType::None));
18651876

1866-
llvm::SmallVector<mlir::Type> ivTypes;
1867-
llvm::SmallVector<mlir::Location> ivLocs;
1868-
llvm::SmallVector<bool> inclusiveBounds;
1869-
1870-
llvm::SmallVector<mlir::Location> locs;
1871-
locs.push_back(currentLocation); // Location of the directive
1872-
Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
1873-
bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
1874-
if (isDoConcurrent) {
1875-
locs.push_back(converter.genLocation(
1876-
Fortran::parser::FindSourceLocation(outerDoConstruct)));
1877-
const Fortran::parser::LoopControl *loopControl =
1878-
&*outerDoConstruct.GetLoopControl();
1879-
const auto &concurrent =
1880-
std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
1881-
if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
1882-
.empty())
1883-
TODO(currentLocation, "DO CONCURRENT with locality spec");
1884-
1885-
const auto &concurrentHeader =
1886-
std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
1887-
const auto &controls =
1888-
std::get<std::list<Fortran::parser::ConcurrentControl>>(
1889-
concurrentHeader.t);
1890-
for (const auto &control : controls) {
1891-
lowerbounds.push_back(fir::getBase(converter.genExprValue(
1892-
*Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx)));
1893-
upperbounds.push_back(fir::getBase(converter.genExprValue(
1894-
*Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx)));
1895-
if (const auto &expr =
1896-
std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
1897-
control.t))
1898-
steps.push_back(fir::getBase(converter.genExprValue(
1899-
*Fortran::semantics::GetExpr(*expr), stmtCtx)));
1900-
else // If `step` is not present, assume it is `1`.
1901-
steps.push_back(builder.createIntegerConstant(
1902-
currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
1903-
1904-
const auto &name = std::get<Fortran::parser::Name>(control.t);
1905-
privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs,
1906-
privateOperands, ivPrivate, privatizations, isDoConcurrent);
1907-
1908-
inclusiveBounds.push_back(true);
1909-
}
1910-
} else {
1911-
int64_t collapseValue = Fortran::lower::getCollapseValue(accClauseList);
1912-
for (unsigned i = 0; i < collapseValue; ++i) {
1913-
const Fortran::parser::LoopControl *loopControl;
1914-
if (i == 0) {
1915-
loopControl = &*outerDoConstruct.GetLoopControl();
1916-
locs.push_back(converter.genLocation(
1917-
Fortran::parser::FindSourceLocation(outerDoConstruct)));
1918-
} else {
1919-
auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
1920-
assert(doCons && "expect do construct");
1921-
loopControl = &*doCons->GetLoopControl();
1922-
locs.push_back(converter.genLocation(
1923-
Fortran::parser::FindSourceLocation(*doCons)));
1924-
}
1925-
1926-
const Fortran::parser::LoopControl::Bounds *bounds =
1927-
std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
1928-
assert(bounds && "Expected bounds on the loop construct");
1929-
lowerbounds.push_back(fir::getBase(converter.genExprValue(
1930-
*Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
1931-
upperbounds.push_back(fir::getBase(converter.genExprValue(
1932-
*Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
1933-
if (bounds->step)
1934-
steps.push_back(fir::getBase(converter.genExprValue(
1935-
*Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
1936-
else // If `step` is not present, assume it is `1`.
1937-
steps.push_back(builder.createIntegerConstant(
1938-
currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
1939-
1940-
Fortran::semantics::Symbol &ivSym =
1941-
bounds->name.thing.symbol->GetUltimate();
1942-
privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
1943-
privateOperands, ivPrivate, privatizations);
1944-
1945-
inclusiveBounds.push_back(true);
1946-
1947-
if (i < collapseValue - 1)
1948-
crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
1949-
}
1950-
}
1951-
19521877
for (const Fortran::parser::AccClause &clause : accClauseList.v) {
19531878
mlir::Location clauseLocation = converter.genLocation(clause.source);
19541879
if (const auto *gangClause =
@@ -2101,6 +2026,91 @@ static mlir::acc::LoopOp createLoopOp(
21012026
}
21022027
}
21032028

2029+
llvm::SmallVector<mlir::Type> ivTypes;
2030+
llvm::SmallVector<mlir::Location> ivLocs;
2031+
llvm::SmallVector<bool> inclusiveBounds;
2032+
llvm::SmallVector<mlir::Location> locs;
2033+
locs.push_back(currentLocation); // Location of the directive
2034+
Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
2035+
bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
2036+
if (isDoConcurrent) {
2037+
locs.push_back(converter.genLocation(
2038+
Fortran::parser::FindSourceLocation(outerDoConstruct)));
2039+
const Fortran::parser::LoopControl *loopControl =
2040+
&*outerDoConstruct.GetLoopControl();
2041+
const auto &concurrent =
2042+
std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
2043+
if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
2044+
.empty())
2045+
TODO(currentLocation, "DO CONCURRENT with locality spec");
2046+
2047+
const auto &concurrentHeader =
2048+
std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
2049+
const auto &controls =
2050+
std::get<std::list<Fortran::parser::ConcurrentControl>>(
2051+
concurrentHeader.t);
2052+
for (const auto &control : controls) {
2053+
lowerbounds.push_back(fir::getBase(converter.genExprValue(
2054+
*Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx)));
2055+
upperbounds.push_back(fir::getBase(converter.genExprValue(
2056+
*Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx)));
2057+
if (const auto &expr =
2058+
std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
2059+
control.t))
2060+
steps.push_back(fir::getBase(converter.genExprValue(
2061+
*Fortran::semantics::GetExpr(*expr), stmtCtx)));
2062+
else // If `step` is not present, assume it is `1`.
2063+
steps.push_back(builder.createIntegerConstant(
2064+
currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
2065+
2066+
const auto &name = std::get<Fortran::parser::Name>(control.t);
2067+
privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs,
2068+
privateOperands, ivPrivate, privatizations, isDoConcurrent);
2069+
2070+
inclusiveBounds.push_back(true);
2071+
}
2072+
} else {
2073+
int64_t collapseValue = Fortran::lower::getCollapseValue(accClauseList);
2074+
for (unsigned i = 0; i < collapseValue; ++i) {
2075+
const Fortran::parser::LoopControl *loopControl;
2076+
if (i == 0) {
2077+
loopControl = &*outerDoConstruct.GetLoopControl();
2078+
locs.push_back(converter.genLocation(
2079+
Fortran::parser::FindSourceLocation(outerDoConstruct)));
2080+
} else {
2081+
auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
2082+
assert(doCons && "expect do construct");
2083+
loopControl = &*doCons->GetLoopControl();
2084+
locs.push_back(converter.genLocation(
2085+
Fortran::parser::FindSourceLocation(*doCons)));
2086+
}
2087+
2088+
const Fortran::parser::LoopControl::Bounds *bounds =
2089+
std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
2090+
assert(bounds && "Expected bounds on the loop construct");
2091+
lowerbounds.push_back(fir::getBase(converter.genExprValue(
2092+
*Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
2093+
upperbounds.push_back(fir::getBase(converter.genExprValue(
2094+
*Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
2095+
if (bounds->step)
2096+
steps.push_back(fir::getBase(converter.genExprValue(
2097+
*Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
2098+
else // If `step` is not present, assume it is `1`.
2099+
steps.push_back(builder.createIntegerConstant(
2100+
currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
2101+
2102+
Fortran::semantics::Symbol &ivSym =
2103+
bounds->name.thing.symbol->GetUltimate();
2104+
privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
2105+
privateOperands, ivPrivate, privatizations);
2106+
2107+
inclusiveBounds.push_back(true);
2108+
2109+
if (i < collapseValue - 1)
2110+
crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
2111+
}
2112+
}
2113+
21042114
// Prepare the operand segment size attribute and the operands value range.
21052115
llvm::SmallVector<mlir::Value> operands;
21062116
llvm::SmallVector<int32_t> operandSegments;

flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,24 @@ llvm::LogicalResult cuf::DeallocateOp::verify() {
139139
return mlir::success();
140140
}
141141

142+
//===----------------------------------------------------------------------===//
143+
// KernelLaunchOp
144+
//===----------------------------------------------------------------------===//
145+
146+
template <typename OpTy>
147+
static llvm::LogicalResult checkStreamType(OpTy op) {
148+
if (!op.getStream())
149+
return mlir::success();
150+
auto refTy = mlir::dyn_cast<fir::ReferenceType>(op.getStream().getType());
151+
if (!refTy.getEleTy().isInteger(64))
152+
return op.emitOpError("stream is expected to be a i64 reference");
153+
return mlir::success();
154+
}
155+
156+
llvm::LogicalResult cuf::KernelLaunchOp::verify() {
157+
return checkStreamType(*this);
158+
}
159+
142160
//===----------------------------------------------------------------------===//
143161
// KernelOp
144162
//===----------------------------------------------------------------------===//
@@ -324,10 +342,7 @@ void cuf::SharedMemoryOp::build(
324342
//===----------------------------------------------------------------------===//
325343

326344
llvm::LogicalResult cuf::StreamCastOp::verify() {
327-
auto refTy = mlir::dyn_cast<fir::ReferenceType>(getStream().getType());
328-
if (!refTy.getEleTy().isInteger(64))
329-
return emitOpError("stream is expected to be a i64 reference");
330-
return mlir::success();
345+
return checkStreamType(*this);
331346
}
332347

333348
// Tablegen operators

flang/lib/Optimizer/Transforms/CUFOpConversion.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -879,8 +879,13 @@ struct CUFLaunchOpConversion
879879
gpuLaunchOp.getClusterSizeYMutable().assign(clusterDimY);
880880
gpuLaunchOp.getClusterSizeZMutable().assign(clusterDimZ);
881881
}
882-
if (op.getStream())
883-
gpuLaunchOp.getAsyncObjectMutable().assign(op.getStream());
882+
if (op.getStream()) {
883+
mlir::OpBuilder::InsertionGuard guard(rewriter);
884+
rewriter.setInsertionPoint(gpuLaunchOp);
885+
mlir::Value stream =
886+
rewriter.create<cuf::StreamCastOp>(loc, op.getStream());
887+
gpuLaunchOp.getAsyncDependenciesMutable().append(stream);
888+
}
884889
if (procAttr)
885890
gpuLaunchOp->setAttr(cuf::getProcAttrName(), procAttr);
886891
rewriter.replaceOp(op, gpuLaunchOp);
@@ -933,6 +938,7 @@ class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> {
933938
/*forceUnifiedTBAATree=*/false, *dl);
934939
target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithDialect,
935940
mlir::gpu::GPUDialect>();
941+
target.addLegalOp<cuf::StreamCastOp>();
936942
cuf::populateCUFToFIRConversionPatterns(typeConverter, *dl, symtab,
937943
patterns);
938944
if (mlir::failed(mlir::applyPartialConversion(getOperation(), target,

flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -823,17 +823,9 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
823823
if (maskRank == 0) {
824824
mlir::Type i1Type = builder.getI1Type();
825825
mlir::Type logical = maskElemType;
826-
mlir::IndexType idxTy = builder.getIndexType();
827-
828-
fir::SequenceType::Shape singleElement(1, 1);
829-
mlir::Type arrTy = fir::SequenceType::get(singleElement, logical);
830-
mlir::Type boxArrTy = fir::BoxType::get(arrTy);
831-
mlir::Value array = builder.create<fir::ConvertOp>(loc, boxArrTy, mask);
832-
833-
mlir::Value indx = builder.createIntegerConstant(loc, idxTy, 0);
834826
mlir::Type logicalRefTy = builder.getRefType(logical);
835827
mlir::Value condAddr =
836-
builder.create<fir::CoordinateOp>(loc, logicalRefTy, array, indx);
828+
builder.create<fir::BoxAddrOp>(loc, logicalRefTy, mask);
837829
mlir::Value cond = builder.create<fir::LoadOp>(loc, condAddr);
838830
mlir::Value condI1 = builder.create<fir::ConvertOp>(loc, i1Type, cond);
839831

flang/test/Fir/CUDA/cuda-launch.fir

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,14 +146,13 @@ module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_e
146146
%1:2 = hlfir.declare %0 {uniq_name = "_QMtest_callFhostEstream"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
147147
%c1_i32 = arith.constant 1 : i32
148148
%c0_i32 = arith.constant 0 : i32
149-
%2 = fir.load %1#0 : !fir.ref<i64>
150-
cuf.kernel_launch @_QMdevptrPtest<<<%c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c0_i32, %2 : i64>>>()
149+
cuf.kernel_launch @_QMdevptrPtest<<<%c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c0_i32, %1#0 : !fir.ref<i64>>>>()
151150
return
152151
}
153152
}
154153

155154
// CHECK-LABEL: func.func @_QQmain()
156155
// CHECK: %[[STREAM:.*]] = fir.alloca i64 {bindc_name = "stream", uniq_name = "_QMtest_callFhostEstream"}
157156
// CHECK: %[[DECL_STREAM:.*]]:2 = hlfir.declare %[[STREAM]] {uniq_name = "_QMtest_callFhostEstream"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
158-
// CHECK: %[[STREAM_LOADED:.*]] = fir.load %[[DECL_STREAM]]#0 : !fir.ref<i64>
159-
// CHECK: gpu.launch_func <%[[STREAM_LOADED]] : i64> @cuda_device_mod::@_QMdevptrPtest
157+
// CHECK: %[[TOKEN:.*]] = cuf.stream_cast %[[DECL_STREAM]]#0 : <i64>
158+
// CHECK: gpu.launch_func [%[[TOKEN]]] @cuda_device_mod::@_QMdevptrPtest

0 commit comments

Comments
 (0)