Skip to content

Commit dcec016

Browse files
committed
Allow multiple tile clauses, fix multiple 'collapse' checking
There is no restriction on multiple tile clauses. There is perhaps some goofy behavioral decisions to make during codegen to decide which one wins, but here it isn't too bad. Multiple 'collapse' clauses have a similar problem, but with more difficult checking (since we have to take the instantiation status into consideration).
1 parent 8fb440a commit dcec016

File tree

7 files changed

+142
-54
lines changed

7 files changed

+142
-54
lines changed

clang/include/clang/Sema/SemaOpenACC.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ class SemaOpenACC : public SemaBase {
8181
/// Collapse has an 'N' count that makes it apply to a number of loops 'below'
8282
/// it.
8383
struct CollapseCheckingInfo {
84-
OpenACCCollapseClause *ActiveCollapse = nullptr;
84+
const OpenACCCollapseClause *ActiveCollapse = nullptr;
8585

8686
/// This is a value that maintains the current value of the 'N' on the
8787
/// current collapse, minus the depth that has already been traversed. When
@@ -110,7 +110,7 @@ class SemaOpenACC : public SemaBase {
110110
/// own counting of elements.
111111
UnsignedOrNone CurTileCount = std::nullopt;
112112

113-
/// Records whether we've hit a 'CurTileCount' of '0' on the wya down,
113+
/// Records whether we've hit a 'CurTileCount' of '0' on the way down,
114114
/// which allows us to diagnose if the number of arguments is too large for
115115
/// the current number of 'for' loops.
116116
bool TileDepthSatisfied = true;

clang/lib/AST/OpenACCClause.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -190,18 +190,15 @@ OpenACCCollapseClause::OpenACCCollapseClause(SourceLocation BeginLoc,
190190
SourceLocation EndLoc)
191191
: OpenACCClauseWithSingleIntExpr(OpenACCClauseKind::Collapse, BeginLoc,
192192
LParenLoc, LoopCount, EndLoc),
193-
HasForce(HasForce) {
194-
assert(LoopCount && "LoopCount required");
195-
}
193+
HasForce(HasForce) {}
196194

197195
OpenACCCollapseClause *
198196
OpenACCCollapseClause::Create(const ASTContext &C, SourceLocation BeginLoc,
199197
SourceLocation LParenLoc, bool HasForce,
200198
Expr *LoopCount, SourceLocation EndLoc) {
201-
assert(
202-
LoopCount &&
203-
(LoopCount->isInstantiationDependent() || isa<ConstantExpr>(LoopCount)) &&
204-
"Loop count not constant expression");
199+
assert((!LoopCount || (LoopCount->isInstantiationDependent() ||
200+
isa<ConstantExpr>(LoopCount))) &&
201+
"Loop count not constant expression");
205202
void *Mem =
206203
C.Allocate(sizeof(OpenACCCollapseClause), alignof(OpenACCCollapseClause));
207204
return new (Mem)

clang/lib/Sema/SemaOpenACC.cpp

Lines changed: 95 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -221,50 +221,106 @@ SemaOpenACC::AssociatedStmtRAII::AssociatedStmtRAII(
221221
}
222222
}
223223

224+
namespace {
225+
// Given two collapse clauses, and the uninstanted version of the new one,
226+
// return the 'best' one for the purposes of setting the collapse checking
227+
// values.
228+
const OpenACCCollapseClause *
229+
getBestCollapseCandidate(const OpenACCCollapseClause *Old,
230+
const OpenACCCollapseClause *New,
231+
const OpenACCCollapseClause *UnInstNew) {
232+
// If the loop count is nullptr, it is because instantiation failed, so this
233+
// can't be the best one.
234+
if (!New->getLoopCount())
235+
return Old;
236+
237+
// If the loop-count had an error, than 'new' isn't a candidate.
238+
if (!New->getLoopCount())
239+
return Old;
240+
241+
// Don't consider uninstantiated ones, since we can't really check these.
242+
if (New->getLoopCount()->isInstantiationDependent())
243+
return Old;
244+
245+
// If this is an instantiation, and the old version wasn't instantation
246+
// dependent, than nothing has changed and we've already done a diagnostic
247+
// based on this one, so don't consider it.
248+
if (UnInstNew && !UnInstNew->getLoopCount()->isInstantiationDependent())
249+
return Old;
250+
251+
// New is now a valid candidate, so if there isn't an old one at this point,
252+
// New is the only valid one.
253+
if (!Old)
254+
return New;
255+
256+
// If the 'New' expression has a larger value than 'Old', then it is the new
257+
// best candidate.
258+
if (cast<ConstantExpr>(Old->getLoopCount())->getResultAsAPSInt() <
259+
cast<ConstantExpr>(New->getLoopCount())->getResultAsAPSInt())
260+
return New;
261+
262+
return Old;
263+
}
264+
} // namespace
265+
224266
void SemaOpenACC::AssociatedStmtRAII::SetCollapseInfoBeforeAssociatedStmt(
225267
ArrayRef<const OpenACCClause *> UnInstClauses,
226268
ArrayRef<OpenACCClause *> Clauses) {
227269

228270
// Reset this checking for loops that aren't covered in a RAII object.
229271
SemaRef.LoopInfo.CurLevelHasLoopAlready = false;
230272
SemaRef.CollapseInfo.CollapseDepthSatisfied = true;
273+
SemaRef.CollapseInfo.CurCollapseCount = 0;
231274
SemaRef.TileInfo.TileDepthSatisfied = true;
232275

233276
// We make sure to take an optional list of uninstantiated clauses, so that
234277
// we can check to make sure we don't 'double diagnose' in the event that
235-
// the value of 'N' was not dependent in a template. We also ensure during
236-
// Sema that there is only 1 collapse on each construct, so we can count on
237-
// the fact that if both find a 'collapse', that they are the same one.
238-
auto *CollapseClauseItr =
239-
llvm::find_if(Clauses, llvm::IsaPred<OpenACCCollapseClause>);
240-
auto *UnInstCollapseClauseItr =
278+
// the value of 'N' was not dependent in a template. Since we cannot count on
279+
// there only being a single collapse clause, we count on the order to make
280+
// sure get the matching ones, and we count on TreeTransform not removing
281+
// these, even if loop-count instantiation failed. We can check the
282+
// non-dependent ones right away, and realize that subsequent instantiation
283+
// can only make it more specific.
284+
285+
auto *UnInstClauseItr =
241286
llvm::find_if(UnInstClauses, llvm::IsaPred<OpenACCCollapseClause>);
242-
243-
if (Clauses.end() == CollapseClauseItr)
244-
return;
245-
246-
OpenACCCollapseClause *CollapseClause =
247-
cast<OpenACCCollapseClause>(*CollapseClauseItr);
248-
249-
SemaRef.CollapseInfo.ActiveCollapse = CollapseClause;
250-
Expr *LoopCount = CollapseClause->getLoopCount();
251-
252-
// If the loop count is still instantiation dependent, setting the depth
253-
// counter isn't necessary, so return here.
254-
if (!LoopCount || LoopCount->isInstantiationDependent())
255-
return;
256-
257-
// Suppress diagnostics if we've done a 'transform' where the previous version
258-
// wasn't dependent, meaning we already diagnosed it.
259-
if (UnInstCollapseClauseItr != UnInstClauses.end() &&
260-
!cast<OpenACCCollapseClause>(*UnInstCollapseClauseItr)
261-
->getLoopCount()
262-
->isInstantiationDependent())
287+
auto *ClauseItr =
288+
llvm::find_if(Clauses, llvm::IsaPred<OpenACCCollapseClause>);
289+
const OpenACCCollapseClause *FoundClause = nullptr;
290+
291+
// Loop through the list of Collapse clauses and find the one that:
292+
// 1- Has a non-dependent, non-null loop count (null means error, likely
293+
// during instantiation).
294+
// 2- If UnInstClauses isn't empty, its corresponding
295+
// loop count was dependent.
296+
// 3- Has the largest 'loop count' of all.
297+
while (ClauseItr != Clauses.end()) {
298+
const OpenACCCollapseClause *CurClause =
299+
cast<OpenACCCollapseClause>(*ClauseItr);
300+
const OpenACCCollapseClause *UnInstCurClause =
301+
UnInstClauseItr == UnInstClauses.end()
302+
? nullptr
303+
: cast<OpenACCCollapseClause>(*UnInstClauseItr);
304+
305+
FoundClause =
306+
getBestCollapseCandidate(FoundClause, CurClause, UnInstCurClause);
307+
308+
UnInstClauseItr =
309+
UnInstClauseItr == UnInstClauses.end()
310+
? UnInstClauseItr
311+
: std::find_if(std::next(UnInstClauseItr), UnInstClauses.end(),
312+
llvm::IsaPred<OpenACCCollapseClause>);
313+
ClauseItr = std::find_if(std::next(ClauseItr), Clauses.end(),
314+
llvm::IsaPred<OpenACCCollapseClause>);
315+
}
316+
317+
if (!FoundClause)
263318
return;
264319

320+
SemaRef.CollapseInfo.ActiveCollapse = FoundClause;
265321
SemaRef.CollapseInfo.CollapseDepthSatisfied = false;
266322
SemaRef.CollapseInfo.CurCollapseCount =
267-
cast<ConstantExpr>(LoopCount)->getResultAsAPSInt();
323+
cast<ConstantExpr>(FoundClause->getLoopCount())->getResultAsAPSInt();
268324
SemaRef.CollapseInfo.DirectiveKind = DirKind;
269325
}
270326

@@ -283,6 +339,17 @@ void SemaOpenACC::AssociatedStmtRAII::SetTileInfoBeforeAssociatedStmt(
283339
return;
284340

285341
OpenACCTileClause *TileClause = cast<OpenACCTileClause>(*TileClauseItr);
342+
343+
// Multiple tile clauses are allowed, so ensure that we use the one with the
344+
// largest 'tile count'.
345+
while (Clauses.end() !=
346+
(TileClauseItr = std::find_if(std::next(TileClauseItr), Clauses.end(),
347+
llvm::IsaPred<OpenACCTileClause>))) {
348+
OpenACCTileClause *NewClause = cast<OpenACCTileClause>(*TileClauseItr);
349+
if (NewClause->getSizeExprs().size() > TileClause->getSizeExprs().size())
350+
TileClause = NewClause;
351+
}
352+
286353
SemaRef.TileInfo.ActiveTile = TileClause;
287354
SemaRef.TileInfo.TileDepthSatisfied = false;
288355
SemaRef.TileInfo.CurTileCount =

clang/lib/Sema/TreeTransform.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12344,9 +12344,6 @@ void OpenACCClauseTransform<Derived>::VisitCollapseClause(
1234412344
NewLoopCount =
1234512345
Self.getSema().OpenACC().CheckCollapseLoopCount(NewLoopCount.get());
1234612346

12347-
if (!NewLoopCount.isUsable())
12348-
return;
12349-
1235012347
ParsedClause.setCollapseDetails(C.hasForce(), NewLoopCount.get());
1235112348
NewClause = OpenACCCollapseClause::Create(
1235212349
Self.getSema().getASTContext(), ParsedClause.getBeginLoc(),

clang/test/SemaOpenACC/combined-construct-tile-clause.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -143,13 +143,6 @@ void only_for_loops() {
143143
do{}while(true);
144144
}
145145

146-
void only_one_on_loop() {
147-
// expected-error@+2{{OpenACC 'tile' clause cannot appear more than once on a 'serial loop' directive}}
148-
// expected-note@+1{{previous clause is here}}
149-
#pragma acc serial loop tile(1) tile(1)
150-
for(int i = 0; i < 5; ++i);
151-
}
152-
153146
template<unsigned Val>
154147
void depth_too_high_templ() {
155148
// expected-error@+1{{'tile' clause specifies a loop count greater than the number of available loops}}

clang/test/SemaOpenACC/loop-construct-collapse-clause.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,3 +493,30 @@ void intervening_without_force() {
493493
for(;;);
494494
}
495495

496+
template<unsigned N>
497+
void allow_multiple_collapse_templ() {
498+
// expected-error@+2{{'collapse' clause specifies a loop count greater than the number of available loops}}
499+
// expected-note@+1{{active 'collapse' clause defined here}}
500+
#pragma acc loop collapse(N) collapse(N+1)
501+
for(unsigned i = 0; i < 5; ++i)
502+
for(unsigned j = 0; j < 5; ++j);
503+
// expected-error@+2{{'collapse' clause specifies a loop count greater than the number of available loops}}
504+
// expected-note@+1{{active 'collapse' clause defined here}}
505+
#pragma acc loop collapse(N+1) collapse(N)
506+
for(unsigned i = 0; i < 5; ++i)
507+
for(unsigned j = 0; j < 5; ++j);
508+
}
509+
void allow_multiple_collapse() {
510+
allow_multiple_collapse_templ<2>(); // expected-note{{in instantiation}}
511+
512+
// expected-error@+2{{'collapse' clause specifies a loop count greater than the number of available loops}}
513+
// expected-note@+1{{active 'collapse' clause defined here}}
514+
#pragma acc loop collapse(2) collapse(3)
515+
for(unsigned i = 0; i < 5; ++i)
516+
for(unsigned j = 0; j < 5; ++j);
517+
// expected-error@+2{{'collapse' clause specifies a loop count greater than the number of available loops}}
518+
// expected-note@+1{{active 'collapse' clause defined here}}
519+
#pragma acc loop collapse(3) collapse(2)
520+
for(unsigned i = 0; i < 5; ++i)
521+
for(unsigned j = 0; j < 5; ++j);
522+
}

clang/test/SemaOpenACC/loop-construct-tile-clause.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -143,13 +143,6 @@ void only_for_loops() {
143143
do{}while(true);
144144
}
145145

146-
void only_one_on_loop() {
147-
// expected-error@+2{{OpenACC 'tile' clause cannot appear more than once on a 'loop' directive}}
148-
// expected-note@+1{{previous clause is here}}
149-
#pragma acc loop tile(1) tile(1)
150-
for(int i = 0; i < 5; ++i);
151-
}
152-
153146
template<unsigned Val>
154147
void depth_too_high_templ() {
155148
// expected-error@+1{{'tile' clause specifies a loop count greater than the number of available loops}}
@@ -384,6 +377,20 @@ void intervening() {
384377
}
385378
}
386379

380+
void use_largest_tile() {
381+
// expected-error@+2{{'tile' clause specifies a loop count greater than the number of available loops}}
382+
// expected-note@+1{{active 'tile' clause defined here}}
383+
#pragma acc loop tile(1,2) tile (3,4,5)
384+
for(int i = 0; i < 5; ++i)
385+
for (int j = 0; j < 5; ++j);
386+
387+
// expected-error@+2{{'tile' clause specifies a loop count greater than the number of available loops}}
388+
// expected-note@+1{{active 'tile' clause defined here}}
389+
#pragma acc loop tile (3,4,5) tile(1,2)
390+
for(int i = 0; i < 5; ++i)
391+
for (int j = 0; j < 5; ++j);
392+
}
393+
387394
void collapse_tile_depth() {
388395
// expected-error@+4{{'collapse' clause specifies a loop count greater than the number of available loops}}
389396
// expected-note@+3{{active 'collapse' clause defined here}}

0 commit comments

Comments
 (0)