From 5b7a2c9e4aa052c316c07877dc531899e0b0d053 Mon Sep 17 00:00:00 2001 From: erichkeane Date: Thu, 8 May 2025 09:50:01 -0700 Subject: [PATCH] [OpenACC][CIR] Impl default/seq lowering for combined constructs This adds two clauses plus the infrastructure for emitting the clauses on combined constructs. Combined constructs require two operations, so this makes sure we emit on the 'correct' one. It DOES require that the combined construct handling picks the correct one to put it on, AND sets up the 'inserter' correctly, but these two clauses don't require an inserter, so a future patch will get those. --- clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h | 51 ++++++++++++++++-- clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp | 9 ++++ clang/test/CIR/CodeGenOpenACC/combined.cpp | 53 +++++++++++++++++++ .../openacc-not-implemented.cpp | 8 ++- 4 files changed, 115 insertions(+), 6 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index 686bd32217466..e3a69ba8282f7 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -23,9 +23,25 @@ constexpr bool isOneOfTypes = template constexpr bool isOneOfTypes = std::is_same_v; +// Holds information for emitting clauses for a combined construct. We +// instantiate the clause emitter with this type so that it can use +// if-constexpr to specially handle these. +template struct CombinedConstructClauseInfo { + using ComputeOpTy = CompOpTy; + ComputeOpTy computeOp; + mlir::acc::LoopOp loopOp; +}; + +template constexpr bool isCombinedType = false; +template +constexpr bool isCombinedType> = true; + template class OpenACCClauseCIREmitter final : public OpenACCClauseVisitor> { + // Necessary for combined constructs. + template friend class OpenACCClauseCIREmitter; + OpTy &operation; CIRGen::CIRGenFunction &cgf; CIRGen::CIRGenBuilderTy &builder; @@ -119,6 +135,26 @@ class OpenACCClauseCIREmitter final llvm_unreachable("unknown gang kind"); } + template , U>> + void applyToLoopOp(const OpenACCClause &c) { + // TODO OpenACC: we have to set the insertion scope here correctly still. + OpenACCClauseCIREmitter loopEmitter{ + operation.loopOp, cgf, builder, dirKind, dirLoc}; + loopEmitter.lastDeviceTypeValues = lastDeviceTypeValues; + loopEmitter.Visit(&c); + } + + template , U>> + void applyToComputeOp(const OpenACCClause &c) { + // TODO OpenACC: we have to set the insertion scope here correctly still. + OpenACCClauseCIREmitter computeEmitter{ + operation.computeOp, cgf, builder, dirKind, dirLoc}; + computeEmitter.lastDeviceTypeValues = lastDeviceTypeValues; + computeEmitter.Visit(&c); + } + public: OpenACCClauseCIREmitter(OpTy &operation, CIRGen::CIRGenFunction &cgf, CIRGen::CIRGenBuilderTy &builder, @@ -145,10 +181,10 @@ class OpenACCClauseCIREmitter final case OpenACCDefaultClauseKind::Invalid: break; } + } else if constexpr (isCombinedType) { + applyToComputeOp(clause); } else { - // TODO: When we've implemented this for everything, switch this to an - // unreachable. Combined constructs remain. - return clauseNotImplemented(clause); + llvm_unreachable("Unknown construct kind in VisitDefaultClause"); } } @@ -175,9 +211,12 @@ class OpenACCClauseCIREmitter final // Nothing to do here, these constructs don't have any IR for these, as // they just modify the other clauses IR. So setting of // `lastDeviceTypeValues` (done above) is all we need. + } else if constexpr (isCombinedType) { + // Nothing to do here either, combined constructs are just going to use + // 'lastDeviceTypeValues' to set the value for the child visitor. } else { // TODO: When we've implemented this for everything, switch this to an - // unreachable. update, data, routine, combined constructs remain. + // unreachable. update, data, routine constructs remain. return clauseNotImplemented(clause); } } @@ -334,9 +373,11 @@ class OpenACCClauseCIREmitter final void VisitSeqClause(const OpenACCSeqClause &clause) { if constexpr (isOneOfTypes) { operation.addSeq(builder.getContext(), lastDeviceTypeValues); + } else if constexpr (isCombinedType) { + applyToLoopOp(clause); } else { // TODO: When we've implemented this for everything, switch this to an - // unreachable. Routine, Combined constructs remain. + // unreachable. Routine construct remains. return clauseNotImplemented(clause); } } diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp index cc2470b395cd5..fc76f57ce7c29 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp @@ -109,6 +109,15 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCOpCombinedConstruct( builder.create(end); } + { + mlir::OpBuilder::InsertionGuard guardCase(builder); + CombinedConstructClauseInfo inf{computeOp, loopOp}; + // We don't bother setting the insertion point, since the clause emitter + // is going to have to do this correctly. + makeClauseEmitter(inf, *this, builder, dirKind, dirLoc) + .VisitClauseList(clauses); + } + builder.create(end); } diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp index 4ea192cdcc9f0..13f623c42665d 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp @@ -22,13 +22,66 @@ extern "C" void acc_combined(int N) { // CHECK-NEXT: } loc // CHECK: acc.yield // CHECK-NEXT: } loc + #pragma acc kernels loop for(unsigned I = 0; I < N; ++I); + // CHECK: acc.kernels combined(loop) { + // CHECK: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK: acc.terminator + // CHECK-NEXT: } loc + +#pragma acc parallel loop default(none) + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.parallel combined(loop) { + // CHECK: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK: acc.yield + // CHECK-NEXT: } attributes {defaultAttr = #acc} loc + +#pragma acc serial loop default(present) + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.serial combined(loop) { + // CHECK: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK: acc.yield + // CHECK-NEXT: } attributes {defaultAttr = #acc} loc +#pragma acc kernels loop default(none) + for(unsigned I = 0; I < N; ++I); // CHECK: acc.kernels combined(loop) { // CHECK: acc.loop combined(kernels) { // CHECK: acc.yield // CHECK-NEXT: } loc // CHECK: acc.terminator + // CHECK-NEXT: } attributes {defaultAttr = #acc} loc + +#pragma acc parallel loop seq + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.parallel combined(loop) { + // CHECK: acc.loop combined(parallel) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {seq = [#acc.device_type]} loc + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial loop device_type(nvidia, radeon) seq + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.serial combined(loop) { + // CHECK: acc.loop combined(serial) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {seq = [#acc.device_type, #acc.device_type]} loc + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc kernels loop seq device_type(nvidia, radeon) + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.kernels combined(loop) { + // CHECK: acc.loop combined(kernels) { + // CHECK: acc.yield + // CHECK-NEXT: } attributes {seq = [#acc.device_type]} loc + // CHECK: acc.terminator // CHECK-NEXT: } loc + } diff --git a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp index c560ab32aac31..b3299c0b4c137 100644 --- a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp +++ b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fopenacc -fclangir -emit-cir %s -o %t.cir -verify -// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fopenacc -fclangir -emit-llvm %s -o %t-cir.ll -verify void HelloWorld(int *A, int *B, int *C, int N) { @@ -10,4 +9,11 @@ void HelloWorld(int *A, int *B, int *C, int N) { // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Declare Construct}} #pragma acc declare create(A) + + // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Clause: private}} +#pragma acc parallel loop private(A) + for(int i = 0; i <5; ++i); + // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Clause: async}} +#pragma acc parallel loop async + for(int i = 0; i <5; ++i); }