Skip to content

Commit 26d7348

Browse files
committed
Merge remote-tracking branch 'origin/main' into vplan-add-exit-phi-ops
2 parents 7052337 + 8c83355 commit 26d7348

File tree

97 files changed

+1484
-1362
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+1484
-1362
lines changed

clang/cmake/caches/Fuchsia-stage2.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ set(LLVM_ENABLE_LIBEDIT OFF CACHE BOOL "")
1818
set(LLVM_ENABLE_LLD ON CACHE BOOL "")
1919
set(LLVM_ENABLE_LTO ON CACHE BOOL "")
2020
set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
21+
set(LLVM_ENABLE_PIC OFF CACHE BOOL "")
2122
set(LLVM_ENABLE_PLUGINS OFF CACHE BOOL "")
2223
set(LLVM_ENABLE_UNWIND_TABLES OFF CACHE BOOL "")
2324
set(LLVM_ENABLE_Z3_SOLVER OFF CACHE BOOL "")

clang/include/clang/Frontend/CompilerInstance.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -845,18 +845,25 @@ class CompilerInstance : public ModuleLoader {
845845
/// Creates a \c CompilerInstance for compiling a module.
846846
///
847847
/// This expects a properly initialized \c FrontendInputFile.
848+
///
849+
/// Explicitly-specified \c VFS takes precedence over the VFS of this instance
850+
/// when creating the clone and also prevents \c FileManager sharing.
848851
std::unique_ptr<CompilerInstance> cloneForModuleCompileImpl(
849852
SourceLocation ImportLoc, StringRef ModuleName, FrontendInputFile Input,
850-
StringRef OriginalModuleMapFile, StringRef ModuleFileName);
853+
StringRef OriginalModuleMapFile, StringRef ModuleFileName,
854+
IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS = nullptr);
851855

852856
public:
853857
/// Creates a new \c CompilerInstance for compiling a module.
854858
///
855859
/// This takes care of creating appropriate \c FrontendInputFile for
856860
/// public/private frameworks, inferred modules and such.
857-
std::unique_ptr<CompilerInstance>
858-
cloneForModuleCompile(SourceLocation ImportLoc, Module *Module,
859-
StringRef ModuleFileName);
861+
///
862+
/// Explicitly-specified \c VFS takes precedence over the VFS of this instance
863+
/// when creating the clone and also prevents \c FileManager sharing.
864+
std::unique_ptr<CompilerInstance> cloneForModuleCompile(
865+
SourceLocation ImportLoc, Module *Module, StringRef ModuleFileName,
866+
IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS = nullptr);
860867

861868
/// Compile a module file for the given module, using the options
862869
/// provided by the importing compiler instance. Returns true if the module

clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp

Lines changed: 100 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,27 @@ class OpenACCClauseCIREmitter final
4646
// diagnostics are gone.
4747
SourceLocation dirLoc;
4848

49+
const OpenACCDeviceTypeClause *lastDeviceTypeClause = nullptr;
50+
4951
void clauseNotImplemented(const OpenACCClause &c) {
5052
cgf.cgm.errorNYI(c.getSourceRange(), "OpenACC Clause", c.getClauseKind());
5153
}
5254

55+
mlir::Value createIntExpr(const Expr *intExpr) {
56+
mlir::Value expr = cgf.emitScalarExpr(intExpr);
57+
mlir::Location exprLoc = cgf.cgm.getLoc(intExpr->getBeginLoc());
58+
59+
mlir::IntegerType targetType = mlir::IntegerType::get(
60+
&cgf.getMLIRContext(), cgf.getContext().getIntWidth(intExpr->getType()),
61+
intExpr->getType()->isSignedIntegerOrEnumerationType()
62+
? mlir::IntegerType::SignednessSemantics::Signed
63+
: mlir::IntegerType::SignednessSemantics::Unsigned);
64+
65+
auto conversionOp = builder.create<mlir::UnrealizedConversionCastOp>(
66+
exprLoc, targetType, expr);
67+
return conversionOp.getResult(0);
68+
}
69+
5370
// 'condition' as an OpenACC grammar production is used for 'if' and (some
5471
// variants of) 'self'. It needs to be emitted as a signless-1-bit value, so
5572
// this function emits the expression, then sets the unrealized conversion
@@ -65,6 +82,56 @@ class OpenACCClauseCIREmitter final
6582
return conversionOp.getResult(0);
6683
}
6784

85+
mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
86+
// '*' case leaves no identifier-info, just a nullptr.
87+
if (!ii)
88+
return mlir::acc::DeviceType::Star;
89+
return llvm::StringSwitch<mlir::acc::DeviceType>(ii->getName())
90+
.CaseLower("default", mlir::acc::DeviceType::Default)
91+
.CaseLower("host", mlir::acc::DeviceType::Host)
92+
.CaseLower("multicore", mlir::acc::DeviceType::Multicore)
93+
.CasesLower("nvidia", "acc_device_nvidia",
94+
mlir::acc::DeviceType::Nvidia)
95+
.CaseLower("radeon", mlir::acc::DeviceType::Radeon);
96+
}
97+
98+
// Handle a clause affected by the 'device-type' to the point that they need
99+
// to have the attributes added in the correct/corresponding order, such as
100+
// 'num_workers' or 'vector_length' on a compute construct.
101+
mlir::ArrayAttr
102+
handleDeviceTypeAffectedClause(mlir::ArrayAttr existingDeviceTypes,
103+
mlir::Value argument,
104+
mlir::MutableOperandRange &argCollection) {
105+
llvm::SmallVector<mlir::Attribute> deviceTypes;
106+
107+
// Collect the 'existing' device-type attributes so we can re-create them
108+
// and insert them.
109+
if (existingDeviceTypes) {
110+
for (const mlir::Attribute &Attr : existingDeviceTypes)
111+
deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
112+
builder.getContext(),
113+
cast<mlir::acc::DeviceTypeAttr>(Attr).getValue()));
114+
}
115+
116+
// Insert 1 version of the 'expr' to the NumWorkers list per-current
117+
// device type.
118+
if (lastDeviceTypeClause) {
119+
for (const DeviceTypeArgument &arch :
120+
lastDeviceTypeClause->getArchitectures()) {
121+
deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
122+
builder.getContext(), decodeDeviceType(arch.getIdentifierInfo())));
123+
argCollection.append(argument);
124+
}
125+
} else {
126+
// Else, we just add a single for 'none'.
127+
deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
128+
builder.getContext(), mlir::acc::DeviceType::None));
129+
argCollection.append(argument);
130+
}
131+
132+
return mlir::ArrayAttr::get(builder.getContext(), deviceTypes);
133+
}
134+
68135
public:
69136
OpenACCClauseCIREmitter(OpTy &operation, CIRGenFunction &cgf,
70137
CIRGenBuilderTy &builder,
@@ -95,31 +162,19 @@ class OpenACCClauseCIREmitter final
95162
}
96163
}
97164

98-
mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
99-
// '*' case leaves no identifier-info, just a nullptr.
100-
if (!ii)
101-
return mlir::acc::DeviceType::Star;
102-
return llvm::StringSwitch<mlir::acc::DeviceType>(ii->getName())
103-
.CaseLower("default", mlir::acc::DeviceType::Default)
104-
.CaseLower("host", mlir::acc::DeviceType::Host)
105-
.CaseLower("multicore", mlir::acc::DeviceType::Multicore)
106-
.CasesLower("nvidia", "acc_device_nvidia",
107-
mlir::acc::DeviceType::Nvidia)
108-
.CaseLower("radeon", mlir::acc::DeviceType::Radeon);
109-
}
110-
111165
void VisitDeviceTypeClause(const OpenACCDeviceTypeClause &clause) {
166+
lastDeviceTypeClause = &clause;
112167
if constexpr (isOneOfTypes<OpTy, InitOp, ShutdownOp>) {
113168
llvm::SmallVector<mlir::Attribute> deviceTypes;
114169
std::optional<mlir::ArrayAttr> existingDeviceTypes =
115170
operation.getDeviceTypes();
116171

117172
// Ensure we keep the existing ones, and in the correct 'new' order.
118173
if (existingDeviceTypes) {
119-
for (const mlir::Attribute &Attr : *existingDeviceTypes)
174+
for (mlir::Attribute attr : *existingDeviceTypes)
120175
deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
121176
builder.getContext(),
122-
cast<mlir::acc::DeviceTypeAttr>(Attr).getValue()));
177+
cast<mlir::acc::DeviceTypeAttr>(attr).getValue()));
123178
}
124179

125180
for (const DeviceTypeArgument &arg : clause.getArchitectures()) {
@@ -136,6 +191,36 @@ class OpenACCClauseCIREmitter final
136191
if (!clause.getArchitectures().empty())
137192
operation.setDeviceType(
138193
decodeDeviceType(clause.getArchitectures()[0].getIdentifierInfo()));
194+
} else if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp>) {
195+
// Nothing to do here, these constructs don't have any IR for these, as
196+
// they just modify the other clauses IR. So setting of `lastDeviceType`
197+
// (done above) is all we need.
198+
} else {
199+
return clauseNotImplemented(clause);
200+
}
201+
}
202+
203+
void VisitNumWorkersClause(const OpenACCNumWorkersClause &clause) {
204+
if constexpr (isOneOfTypes<OpTy, ParallelOp, KernelsOp>) {
205+
mlir::MutableOperandRange range = operation.getNumWorkersMutable();
206+
operation.setNumWorkersDeviceTypeAttr(handleDeviceTypeAffectedClause(
207+
operation.getNumWorkersDeviceTypeAttr(),
208+
createIntExpr(clause.getIntExpr()), range));
209+
} else if constexpr (isOneOfTypes<OpTy, SerialOp>) {
210+
llvm_unreachable("num_workers not valid on serial");
211+
} else {
212+
return clauseNotImplemented(clause);
213+
}
214+
}
215+
216+
void VisitVectorLengthClause(const OpenACCVectorLengthClause &clause) {
217+
if constexpr (isOneOfTypes<OpTy, ParallelOp, KernelsOp>) {
218+
mlir::MutableOperandRange range = operation.getVectorLengthMutable();
219+
operation.setVectorLengthDeviceTypeAttr(handleDeviceTypeAffectedClause(
220+
operation.getVectorLengthDeviceTypeAttr(),
221+
createIntExpr(clause.getIntExpr()), range));
222+
} else if constexpr (isOneOfTypes<OpTy, SerialOp>) {
223+
llvm_unreachable("vector_length not valid on serial");
139224
} else {
140225
return clauseNotImplemented(clause);
141226
}

clang/lib/Frontend/CompilerInstance.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,7 +1152,8 @@ static Language getLanguageFromOptions(const LangOptions &LangOpts) {
11521152

11531153
std::unique_ptr<CompilerInstance> CompilerInstance::cloneForModuleCompileImpl(
11541154
SourceLocation ImportLoc, StringRef ModuleName, FrontendInputFile Input,
1155-
StringRef OriginalModuleMapFile, StringRef ModuleFileName) {
1155+
StringRef OriginalModuleMapFile, StringRef ModuleFileName,
1156+
IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
11561157
// Construct a compiler invocation for creating this module.
11571158
auto Invocation = std::make_shared<CompilerInvocation>(getInvocation());
11581159

@@ -1212,19 +1213,21 @@ std::unique_ptr<CompilerInstance> CompilerInstance::cloneForModuleCompileImpl(
12121213
auto &Inv = *Invocation;
12131214
Instance.setInvocation(std::move(Invocation));
12141215

1216+
if (VFS) {
1217+
Instance.createFileManager(std::move(VFS));
1218+
} else if (FrontendOpts.ModulesShareFileManager) {
1219+
Instance.setFileManager(&getFileManager());
1220+
} else {
1221+
Instance.createFileManager(&getVirtualFileSystem());
1222+
}
1223+
12151224
Instance.createDiagnostics(
1216-
getVirtualFileSystem(),
1225+
Instance.getVirtualFileSystem(),
12171226
new ForwardingDiagnosticConsumer(getDiagnosticClient()),
12181227
/*ShouldOwnClient=*/true);
1219-
12201228
if (llvm::is_contained(DiagOpts.SystemHeaderWarningsModules, ModuleName))
12211229
Instance.getDiagnostics().setSuppressSystemWarnings(false);
12221230

1223-
if (FrontendOpts.ModulesShareFileManager) {
1224-
Instance.setFileManager(&getFileManager());
1225-
} else {
1226-
Instance.createFileManager(&getVirtualFileSystem());
1227-
}
12281231
Instance.createSourceManager(Instance.getFileManager());
12291232
SourceManager &SourceMgr = Instance.getSourceManager();
12301233

@@ -1318,7 +1321,8 @@ static OptionalFileEntryRef getPublicModuleMap(FileEntryRef File,
13181321
}
13191322

13201323
std::unique_ptr<CompilerInstance> CompilerInstance::cloneForModuleCompile(
1321-
SourceLocation ImportLoc, Module *Module, StringRef ModuleFileName) {
1324+
SourceLocation ImportLoc, Module *Module, StringRef ModuleFileName,
1325+
IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
13221326
StringRef ModuleName = Module->getTopLevelModuleName();
13231327

13241328
InputKind IK(getLanguageFromOptions(getLangOpts()), InputKind::ModuleMap);
@@ -1363,7 +1367,8 @@ std::unique_ptr<CompilerInstance> CompilerInstance::cloneForModuleCompile(
13631367
return cloneForModuleCompileImpl(
13641368
ImportLoc, ModuleName,
13651369
FrontendInputFile(ModuleMapFilePath, IK, IsSystem),
1366-
ModMap.getModuleMapFileForUniquing(Module)->getName(), ModuleFileName);
1370+
ModMap.getModuleMapFileForUniquing(Module)->getName(), ModuleFileName,
1371+
std::move(VFS));
13671372
}
13681373

13691374
// FIXME: We only need to fake up an input file here as a way of
@@ -1380,7 +1385,8 @@ std::unique_ptr<CompilerInstance> CompilerInstance::cloneForModuleCompile(
13801385
auto Instance = cloneForModuleCompileImpl(
13811386
ImportLoc, ModuleName,
13821387
FrontendInputFile(FakeModuleMapFile, IK, +Module->IsSystem),
1383-
ModMap.getModuleMapFileForUniquing(Module)->getName(), ModuleFileName);
1388+
ModMap.getModuleMapFileForUniquing(Module)->getName(), ModuleFileName,
1389+
std::move(VFS));
13841390

13851391
std::unique_ptr<llvm::MemoryBuffer> ModuleMapBuffer =
13861392
llvm::MemoryBuffer::getMemBufferCopy(InferredModuleMapContent);

clang/test/CIR/CodeGenOpenACC/kernels.c

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,5 +106,109 @@ void acc_kernels(int cond) {
106106
// CHECK-NEXT: acc.terminator
107107
// CHECK-NEXT: } loc
108108

109+
#pragma acc kernels num_workers(cond)
110+
{}
111+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
112+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
113+
// CHECK-NEXT: acc.kernels num_workers(%[[CONV_CAST]] : si32) {
114+
// CHECK-NEXT: acc.terminator
115+
// CHECK-NEXT: } loc
116+
117+
#pragma acc kernels num_workers(cond) device_type(nvidia) num_workers(2u)
118+
{}
119+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
120+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
121+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i
122+
// CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32
123+
// CHECK-NEXT: acc.kernels num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) {
124+
// CHECK-NEXT: acc.terminator
125+
// CHECK-NEXT: } loc
126+
127+
#pragma acc kernels num_workers(cond) device_type(nvidia, host) num_workers(2) device_type(radeon) num_workers(3)
128+
{}
129+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
130+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
131+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
132+
// CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
133+
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
134+
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
135+
// CHECK-NEXT: acc.kernels num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
136+
// CHECK-NEXT: acc.terminator
137+
// CHECK-NEXT: } loc
138+
139+
#pragma acc kernels num_workers(cond) device_type(nvidia) num_workers(2) device_type(radeon, multicore) num_workers(3)
140+
{}
141+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
142+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
143+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
144+
// CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
145+
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
146+
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
147+
// CHECK-NEXT: acc.kernels num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>], %[[THREE_CAST]] : si32 [#acc.device_type<multicore>]) {
148+
// CHECK-NEXT: acc.terminator
149+
// CHECK-NEXT: } loc
150+
151+
#pragma acc kernels device_type(nvidia) num_workers(2) device_type(radeon) num_workers(3)
152+
{}
153+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
154+
// CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
155+
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
156+
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
157+
// CHECK-NEXT: acc.kernels num_workers(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
158+
// CHECK-NEXT: acc.terminator
159+
// CHECK-NEXT: } loc
160+
161+
#pragma acc kernels vector_length(cond)
162+
{}
163+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
164+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
165+
// CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32) {
166+
// CHECK-NEXT: acc.terminator
167+
// CHECK-NEXT: } loc
168+
169+
#pragma acc kernels vector_length(cond) device_type(nvidia) vector_length(2u)
170+
{}
171+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
172+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
173+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i
174+
// CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32
175+
// CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) {
176+
// CHECK-NEXT: acc.terminator
177+
// CHECK-NEXT: } loc
178+
179+
#pragma acc kernels vector_length(cond) device_type(nvidia, host) vector_length(2) device_type(radeon) vector_length(3)
180+
{}
181+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
182+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
183+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
184+
// CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
185+
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
186+
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
187+
// CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
188+
// CHECK-NEXT: acc.terminator
189+
// CHECK-NEXT: } loc
190+
191+
#pragma acc kernels vector_length(cond) device_type(nvidia) vector_length(2) device_type(radeon, multicore) vector_length(3)
192+
{}
193+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
194+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
195+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
196+
// CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
197+
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
198+
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
199+
// CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>], %[[THREE_CAST]] : si32 [#acc.device_type<multicore>]) {
200+
// CHECK-NEXT: acc.terminator
201+
// CHECK-NEXT: } loc
202+
203+
#pragma acc kernels device_type(nvidia) vector_length(2) device_type(radeon) vector_length(3)
204+
{}
205+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
206+
// CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
207+
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
208+
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
209+
// CHECK-NEXT: acc.kernels vector_length(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
210+
// CHECK-NEXT: acc.terminator
211+
// CHECK-NEXT: } loc
212+
109213
// CHECK-NEXT: cir.return
110214
}

0 commit comments

Comments
 (0)