Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 33 additions & 9 deletions clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,13 @@ class OpenACCClauseCIREmitter final

// Handle a clause affected by the 'device-type' to the point that they need
// to have the attributes added in the correct/corresponding order, such as
// 'num_workers' or 'vector_length' on a compute construct.
mlir::ArrayAttr
handleDeviceTypeAffectedClause(mlir::ArrayAttr existingDeviceTypes,
mlir::Value argument,
mlir::MutableOperandRange &argCollection) {
// 'num_workers' or 'vector_length' on a compute construct. For cases where we
// don't have an argument that needs to be added to an additional one (such as
// asyncOnly) we can use this with 'argument' as std::nullopt.
mlir::ArrayAttr handleDeviceTypeAffectedClause(
mlir::ArrayAttr existingDeviceTypes,
std::optional<mlir::Value> argument = std::nullopt,
mlir::MutableOperandRange *argCollection = nullptr) {
llvm::SmallVector<mlir::Attribute> deviceTypes;

// Collect the 'existing' device-type attributes so we can re-create them
Expand All @@ -120,13 +122,19 @@ class OpenACCClauseCIREmitter final
lastDeviceTypeClause->getArchitectures()) {
deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
builder.getContext(), decodeDeviceType(arch.getIdentifierInfo())));
argCollection.append(argument);
if (argument) {
assert(argCollection);
argCollection->append(*argument);
}
}
} else {
// Else, we just add a single for 'none'.
deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
builder.getContext(), mlir::acc::DeviceType::None));
argCollection.append(argument);
if (argument) {
assert(argCollection);
argCollection->append(*argument);
}
}

return mlir::ArrayAttr::get(builder.getContext(), deviceTypes);
Expand Down Expand Up @@ -205,7 +213,7 @@ class OpenACCClauseCIREmitter final
mlir::MutableOperandRange range = operation.getNumWorkersMutable();
operation.setNumWorkersDeviceTypeAttr(handleDeviceTypeAffectedClause(
operation.getNumWorkersDeviceTypeAttr(),
createIntExpr(clause.getIntExpr()), range));
createIntExpr(clause.getIntExpr()), &range));
} else if constexpr (isOneOfTypes<OpTy, SerialOp>) {
llvm_unreachable("num_workers not valid on serial");
} else {
Expand All @@ -218,14 +226,30 @@ class OpenACCClauseCIREmitter final
mlir::MutableOperandRange range = operation.getVectorLengthMutable();
operation.setVectorLengthDeviceTypeAttr(handleDeviceTypeAffectedClause(
operation.getVectorLengthDeviceTypeAttr(),
createIntExpr(clause.getIntExpr()), range));
createIntExpr(clause.getIntExpr()), &range));
} else if constexpr (isOneOfTypes<OpTy, SerialOp>) {
llvm_unreachable("vector_length not valid on serial");
} else {
return clauseNotImplemented(clause);
}
}

void VisitAsyncClause(const OpenACCAsyncClause &clause) {
if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp>) {
if (!clause.hasIntExpr()) {
operation.setAsyncOnlyAttr(
handleDeviceTypeAffectedClause(operation.getAsyncOnlyAttr()));
} else {
mlir::MutableOperandRange range = operation.getAsyncOperandsMutable();
operation.setAsyncOperandsDeviceTypeAttr(handleDeviceTypeAffectedClause(
operation.getAsyncOperandsDeviceTypeAttr(),
createIntExpr(clause.getIntExpr()), &range));
}
} else {
return clauseNotImplemented(clause);
}
}

void VisitSelfClause(const OpenACCSelfClause &clause) {
if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp>) {
if (clause.isEmptySelfClause()) {
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Sema/SemaOpenACCClause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,9 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitVectorLengthClause(

OpenACCClause *SemaOpenACCClauseVisitor::VisitAsyncClause(
SemaOpenACC::OpenACCParsedClause &Clause) {
if (DisallowSinceLastDeviceType<OpenACCAsyncClause>(Clause))
return nullptr;

assert(Clause.getNumIntExprs() < 2 &&
"Invalid number of expressions for Async");
return OpenACCAsyncClause::Create(
Expand Down
46 changes: 46 additions & 0 deletions clang/test/CIR/CodeGenOpenACC/kernels.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,5 +210,51 @@ void acc_kernels(int cond) {
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc

#pragma acc kernels async
{}
// CHECK-NEXT: acc.kernels {
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>]}

#pragma acc kernels async(cond)
{}
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.kernels async(%[[CONV_CAST]] : si32) {
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc

#pragma acc kernels async device_type(nvidia, radeon) async
{}
// CHECK-NEXT: acc.kernels {
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]}

#pragma acc kernels async(3) device_type(nvidia, radeon) async(cond)
{}
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.kernels async(%[[THREE_CAST]] : si32, %[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) {
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc

#pragma acc kernels async device_type(nvidia, radeon) async(cond)
{}
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.kernels async(%[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) {
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>]}

#pragma acc kernels async(3) device_type(nvidia, radeon) async
{}
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
// CHECK-NEXT: acc.kernels async(%[[THREE_CAST]] : si32) {
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<nvidia>, #acc.device_type<radeon>]}

// CHECK-NEXT: cir.return
}
46 changes: 46 additions & 0 deletions clang/test/CIR/CodeGenOpenACC/parallel.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,5 +209,51 @@ void acc_parallel(int cond) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc

#pragma acc parallel async
{}
// CHECK-NEXT: acc.parallel {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>]}

#pragma acc parallel async(cond)
{}
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.parallel async(%[[CONV_CAST]] : si32) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc

#pragma acc parallel async device_type(nvidia, radeon) async
{}
// CHECK-NEXT: acc.parallel {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]}

#pragma acc parallel async(3) device_type(nvidia, radeon) async(cond)
{}
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.parallel async(%[[THREE_CAST]] : si32, %[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc

#pragma acc parallel async device_type(nvidia, radeon) async(cond)
{}
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.parallel async(%[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>]}

#pragma acc parallel async(3) device_type(nvidia, radeon) async
{}
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
// CHECK-NEXT: acc.parallel async(%[[THREE_CAST]] : si32) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<nvidia>, #acc.device_type<radeon>]}

// CHECK-NEXT: cir.return
}
46 changes: 46 additions & 0 deletions clang/test/CIR/CodeGenOpenACC/serial.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,51 @@ void acc_serial(int cond) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc

#pragma acc serial async
{}
// CHECK-NEXT: acc.serial {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>]}

#pragma acc serial async(cond)
{}
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.serial async(%[[CONV_CAST]] : si32) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc

#pragma acc serial async device_type(nvidia, radeon) async
{}
// CHECK-NEXT: acc.serial {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]}

#pragma acc serial async(3) device_type(nvidia, radeon) async(cond)
{}
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.serial async(%[[THREE_CAST]] : si32, %[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc

#pragma acc serial async device_type(nvidia, radeon) async(cond)
{}
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
// CHECK-NEXT: acc.serial async(%[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>]}

#pragma acc serial async(3) device_type(nvidia, radeon) async
{}
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
// CHECK-NEXT: acc.serial async(%[[THREE_CAST]] : si32) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<nvidia>, #acc.device_type<radeon>]}

// CHECK-NEXT: cir.return
}
42 changes: 42 additions & 0 deletions clang/test/SemaOpenACC/compute-construct-async-clause.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,48 @@ void Test() {
#pragma acc serial async(1, 2)
while(1);

// expected-error@+2{{OpenACC 'async' clause cannot appear more than once on a 'kernels' directive}}
// expected-note@+1{{previous clause is here}}
#pragma acc kernels async async
while(1);

// expected-error@+2{{OpenACC 'async' clause cannot appear more than once on a 'kernels' directive}}
// expected-note@+1{{previous clause is here}}
#pragma acc kernels async(1) async(2)
while(1);

// expected-error@+2{{OpenACC 'async' clause cannot appear more than once on a 'parallel' directive}}
// expected-note@+1{{previous clause is here}}
#pragma acc parallel async(1) async(2)
while(1);

// expected-error@+2{{OpenACC 'async' clause cannot appear more than once on a 'serial' directive}}
// expected-note@+1{{previous clause is here}}
#pragma acc serial async(1) async(2)
while(1);

// expected-error@+3{{OpenACC 'async' clause cannot appear more than once in a 'device_type' region on a 'kernels' directive}}
// expected-note@+2{{previous clause is here}}
// expected-note@+1{{previous clause is here}}
#pragma acc kernels async(1) device_type(*) async(1) async(2)
while(1);
// expected-error@+3{{OpenACC 'async' clause cannot appear more than once in a 'device_type' region on a 'parallel' directive}}
// expected-note@+2{{previous clause is here}}
// expected-note@+1{{previous clause is here}}
#pragma acc parallel async device_type(*) async async
while(1);
// expected-error@+3{{OpenACC 'async' clause cannot appear more than once in a 'device_type' region on a 'serial' directive}}
// expected-note@+2{{previous clause is here}}
// expected-note@+1{{previous clause is here}}
#pragma acc serial async(1) device_type(*) async async(2)
while(1);

// expected-error@+3{{OpenACC 'async' clause cannot appear more than once in a 'device_type' region on a 'parallel' directive}}
// expected-note@+2{{previous clause is here}}
// expected-note@+1{{previous clause is here}}
#pragma acc parallel device_type(*) async async
while(1);

struct NotConvertible{} NC;
// expected-error@+1{{OpenACC clause 'async' requires expression of integer type ('struct NotConvertible' invalid)}}
#pragma acc parallel async(NC)
Expand Down
Loading