Skip to content

Commit 5c8bcf7

Browse files
authored
[flang][cuda][NFC] Move CUDA intrinsics lowering to a separate file (#166461)
Just move all CUDA related intrinsics lowering to a separate file to avoid clobbering the main Fortran intrinsic file.
1 parent d998f92 commit 5c8bcf7

File tree

5 files changed

+1691
-1403
lines changed

5 files changed

+1691
-1403
lines changed
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
//==-- Builder/CUDAIntrinsicCall.h - lowering of CUDA intrinsics ---*-C++-*-==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef FORTRAN_LOWER_CUDAINTRINSICCALL_H
10+
#define FORTRAN_LOWER_CUDAINTRINSICCALL_H
11+
12+
#include "flang/Optimizer/Builder/IntrinsicCall.h"
13+
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
14+
15+
namespace fir {
16+
17+
struct CUDAIntrinsicLibrary : IntrinsicLibrary {
18+
19+
// Constructors.
20+
explicit CUDAIntrinsicLibrary(fir::FirOpBuilder &builder, mlir::Location loc)
21+
: IntrinsicLibrary(builder, loc) {}
22+
CUDAIntrinsicLibrary() = delete;
23+
CUDAIntrinsicLibrary(const CUDAIntrinsicLibrary &) = delete;
24+
25+
// CUDA intrinsic handlers.
26+
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
27+
fir::ExtendedValue genAtomicAddR2(mlir::Type,
28+
llvm::ArrayRef<fir::ExtendedValue>);
29+
template <int extent>
30+
fir::ExtendedValue genAtomicAddVector(mlir::Type,
31+
llvm::ArrayRef<fir::ExtendedValue>);
32+
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
33+
fir::ExtendedValue genAtomicCas(mlir::Type,
34+
llvm::ArrayRef<fir::ExtendedValue>);
35+
mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef<mlir::Value>);
36+
fir::ExtendedValue genAtomicExch(mlir::Type,
37+
llvm::ArrayRef<fir::ExtendedValue>);
38+
mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef<mlir::Value>);
39+
mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef<mlir::Value>);
40+
mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef<mlir::Value>);
41+
mlir::Value genAtomicOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
42+
mlir::Value genAtomicSub(mlir::Type, llvm::ArrayRef<mlir::Value>);
43+
fir::ExtendedValue genAtomicXor(mlir::Type,
44+
llvm::ArrayRef<fir::ExtendedValue>);
45+
mlir::Value genBarrierArrive(mlir::Type, llvm::ArrayRef<mlir::Value>);
46+
mlir::Value genBarrierArriveCnt(mlir::Type, llvm::ArrayRef<mlir::Value>);
47+
void genBarrierInit(llvm::ArrayRef<fir::ExtendedValue>);
48+
mlir::Value genBarrierTryWait(mlir::Type, llvm::ArrayRef<mlir::Value>);
49+
mlir::Value genBarrierTryWaitSleep(mlir::Type, llvm::ArrayRef<mlir::Value>);
50+
void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>);
51+
template <const char *fctName, int extent>
52+
fir::ExtendedValue genLDXXFunc(mlir::Type,
53+
llvm::ArrayRef<fir::ExtendedValue>);
54+
mlir::Value genMatchAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
55+
mlir::Value genMatchAnySync(mlir::Type, llvm::ArrayRef<mlir::Value>);
56+
template <typename OpTy>
57+
mlir::Value genNVVMTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
58+
void genSyncThreads(llvm::ArrayRef<fir::ExtendedValue>);
59+
mlir::Value genSyncThreadsAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
60+
mlir::Value genSyncThreadsCount(mlir::Type, llvm::ArrayRef<mlir::Value>);
61+
mlir::Value genSyncThreadsOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
62+
void genSyncWarp(llvm::ArrayRef<fir::ExtendedValue>);
63+
mlir::Value genThisGrid(mlir::Type, llvm::ArrayRef<mlir::Value>);
64+
mlir::Value genThisThreadBlock(mlir::Type, llvm::ArrayRef<mlir::Value>);
65+
mlir::Value genThisWarp(mlir::Type, llvm::ArrayRef<mlir::Value>);
66+
void genThreadFence(llvm::ArrayRef<fir::ExtendedValue>);
67+
void genThreadFenceBlock(llvm::ArrayRef<fir::ExtendedValue>);
68+
void genThreadFenceSystem(llvm::ArrayRef<fir::ExtendedValue>);
69+
void genTMABulkCommitGroup(llvm::ArrayRef<fir::ExtendedValue>);
70+
void genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue>);
71+
void genTMABulkLoadC4(llvm::ArrayRef<fir::ExtendedValue>);
72+
void genTMABulkLoadC8(llvm::ArrayRef<fir::ExtendedValue>);
73+
void genTMABulkLoadI4(llvm::ArrayRef<fir::ExtendedValue>);
74+
void genTMABulkLoadI8(llvm::ArrayRef<fir::ExtendedValue>);
75+
void genTMABulkLoadR2(llvm::ArrayRef<fir::ExtendedValue>);
76+
void genTMABulkLoadR4(llvm::ArrayRef<fir::ExtendedValue>);
77+
void genTMABulkLoadR8(llvm::ArrayRef<fir::ExtendedValue>);
78+
void genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue>);
79+
void genTMABulkStoreC4(llvm::ArrayRef<fir::ExtendedValue>);
80+
void genTMABulkStoreC8(llvm::ArrayRef<fir::ExtendedValue>);
81+
void genTMABulkStoreI4(llvm::ArrayRef<fir::ExtendedValue>);
82+
void genTMABulkStoreI8(llvm::ArrayRef<fir::ExtendedValue>);
83+
void genTMABulkStoreR2(llvm::ArrayRef<fir::ExtendedValue>);
84+
void genTMABulkStoreR4(llvm::ArrayRef<fir::ExtendedValue>);
85+
void genTMABulkStoreR8(llvm::ArrayRef<fir::ExtendedValue>);
86+
void genTMABulkWaitGroup(llvm::ArrayRef<fir::ExtendedValue>);
87+
template <mlir::NVVM::VoteSyncKind kind>
88+
mlir::Value genVoteSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
89+
};
90+
91+
const IntrinsicHandler *findCUDAIntrinsicHandler(llvm::StringRef name);
92+
93+
} // namespace fir
94+
95+
#endif // FORTRAN_LOWER_CUDAINTRINSICCALL_H

flang/include/flang/Optimizer/Builder/IntrinsicCall.h

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "flang/Runtime/iostat-consts.h"
2020
#include "mlir/Dialect/Complex/IR/Complex.h"
2121
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
22-
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
2322
#include "mlir/Dialect/Math/IR/Math.h"
2423
#include <optional>
2524

@@ -187,37 +186,13 @@ struct IntrinsicLibrary {
187186
mlir::Value genAnint(mlir::Type, llvm::ArrayRef<mlir::Value>);
188187
fir::ExtendedValue genAny(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
189188
mlir::Value genAtanpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
190-
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
191-
fir::ExtendedValue genAtomicAddR2(mlir::Type,
192-
llvm::ArrayRef<fir::ExtendedValue>);
193-
template <int extent>
194-
fir::ExtendedValue genAtomicAddVector(mlir::Type,
195-
llvm::ArrayRef<fir::ExtendedValue>);
196-
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
197-
fir::ExtendedValue genAtomicCas(mlir::Type,
198-
llvm::ArrayRef<fir::ExtendedValue>);
199-
mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef<mlir::Value>);
200-
fir::ExtendedValue genAtomicExch(mlir::Type,
201-
llvm::ArrayRef<fir::ExtendedValue>);
202-
mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef<mlir::Value>);
203-
mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef<mlir::Value>);
204-
mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef<mlir::Value>);
205-
mlir::Value genAtomicOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
206-
mlir::Value genAtomicSub(mlir::Type, llvm::ArrayRef<mlir::Value>);
207-
fir::ExtendedValue genAtomicXor(mlir::Type,
208-
llvm::ArrayRef<fir::ExtendedValue>);
209189
fir::ExtendedValue
210190
genCommandArgumentCount(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
211191
mlir::Value genAsind(mlir::Type, llvm::ArrayRef<mlir::Value>);
212192
mlir::Value genAsinpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
213193
fir::ExtendedValue genAssociated(mlir::Type,
214194
llvm::ArrayRef<fir::ExtendedValue>);
215195
mlir::Value genAtand(mlir::Type, llvm::ArrayRef<mlir::Value>);
216-
mlir::Value genBarrierArrive(mlir::Type, llvm::ArrayRef<mlir::Value>);
217-
mlir::Value genBarrierArriveCnt(mlir::Type, llvm::ArrayRef<mlir::Value>);
218-
void genBarrierInit(llvm::ArrayRef<fir::ExtendedValue>);
219-
mlir::Value genBarrierTryWait(mlir::Type, llvm::ArrayRef<mlir::Value>);
220-
mlir::Value genBarrierTryWaitSleep(mlir::Type, llvm::ArrayRef<mlir::Value>);
221196
fir::ExtendedValue genBesselJn(mlir::Type,
222197
llvm::ArrayRef<fir::ExtendedValue>);
223198
fir::ExtendedValue genBesselYn(mlir::Type,
@@ -239,9 +214,6 @@ struct IntrinsicLibrary {
239214
fir::ExtendedValue genCount(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
240215
void genCpuTime(llvm::ArrayRef<fir::ExtendedValue>);
241216
fir::ExtendedValue genCshift(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
242-
template <const char *fctName, int extent>
243-
fir::ExtendedValue genCUDALDXXFunc(mlir::Type,
244-
llvm::ArrayRef<fir::ExtendedValue>);
245217
fir::ExtendedValue genCAssociatedCFunPtr(mlir::Type,
246218
llvm::ArrayRef<fir::ExtendedValue>);
247219
fir::ExtendedValue genCAssociatedCPtr(mlir::Type,
@@ -281,7 +253,6 @@ struct IntrinsicLibrary {
281253
llvm::ArrayRef<fir::ExtendedValue>);
282254
template <Extremum, ExtremumBehavior>
283255
mlir::Value genExtremum(mlir::Type, llvm::ArrayRef<mlir::Value>);
284-
void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>);
285256
mlir::Value genFloor(mlir::Type, llvm::ArrayRef<mlir::Value>);
286257
mlir::Value genFraction(mlir::Type resultType,
287258
mlir::ArrayRef<mlir::Value> args);
@@ -373,8 +344,6 @@ struct IntrinsicLibrary {
373344
mlir::Value genMalloc(mlir::Type, llvm::ArrayRef<mlir::Value>);
374345
template <typename Shift>
375346
mlir::Value genMask(mlir::Type, llvm::ArrayRef<mlir::Value>);
376-
mlir::Value genMatchAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
377-
mlir::Value genMatchAnySync(mlir::Type, llvm::ArrayRef<mlir::Value>);
378347
fir::ExtendedValue genMatmul(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
379348
fir::ExtendedValue genMatmulTranspose(mlir::Type,
380349
llvm::ArrayRef<fir::ExtendedValue>);
@@ -397,8 +366,6 @@ struct IntrinsicLibrary {
397366
fir::ExtendedValue genNull(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
398367
fir::ExtendedValue genNumImages(mlir::Type,
399368
llvm::ArrayRef<fir::ExtendedValue>);
400-
template <typename OpTy>
401-
mlir::Value genNVVMTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
402369
fir::ExtendedValue genPack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
403370
fir::ExtendedValue genParity(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
404371
void genPerror(llvm::ArrayRef<fir::ExtendedValue>);
@@ -453,56 +420,25 @@ struct IntrinsicLibrary {
453420
fir::ExtendedValue genSum(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
454421
void genSignalSubroutine(llvm::ArrayRef<fir::ExtendedValue>);
455422
void genSleep(llvm::ArrayRef<fir::ExtendedValue>);
456-
void genSyncThreads(llvm::ArrayRef<fir::ExtendedValue>);
457-
mlir::Value genSyncThreadsAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
458-
mlir::Value genSyncThreadsCount(mlir::Type, llvm::ArrayRef<mlir::Value>);
459-
mlir::Value genSyncThreadsOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
460-
void genSyncWarp(llvm::ArrayRef<fir::ExtendedValue>);
461423
fir::ExtendedValue genSystem(std::optional<mlir::Type>,
462424
mlir::ArrayRef<fir::ExtendedValue> args);
463425
void genSystemClock(llvm::ArrayRef<fir::ExtendedValue>);
464426
mlir::Value genTand(mlir::Type, llvm::ArrayRef<mlir::Value>);
465427
mlir::Value genTanpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
466428
mlir::Value genTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
467-
void genTMABulkCommitGroup(llvm::ArrayRef<fir::ExtendedValue>);
468-
void genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue>);
469-
void genTMABulkLoadC4(llvm::ArrayRef<fir::ExtendedValue>);
470-
void genTMABulkLoadC8(llvm::ArrayRef<fir::ExtendedValue>);
471-
void genTMABulkLoadI4(llvm::ArrayRef<fir::ExtendedValue>);
472-
void genTMABulkLoadI8(llvm::ArrayRef<fir::ExtendedValue>);
473-
void genTMABulkLoadR2(llvm::ArrayRef<fir::ExtendedValue>);
474-
void genTMABulkLoadR4(llvm::ArrayRef<fir::ExtendedValue>);
475-
void genTMABulkLoadR8(llvm::ArrayRef<fir::ExtendedValue>);
476-
void genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue>);
477-
void genTMABulkStoreI4(llvm::ArrayRef<fir::ExtendedValue>);
478-
void genTMABulkStoreI8(llvm::ArrayRef<fir::ExtendedValue>);
479-
void genTMABulkStoreR2(llvm::ArrayRef<fir::ExtendedValue>);
480-
void genTMABulkStoreR4(llvm::ArrayRef<fir::ExtendedValue>);
481-
void genTMABulkStoreR8(llvm::ArrayRef<fir::ExtendedValue>);
482-
void genTMABulkStoreC4(llvm::ArrayRef<fir::ExtendedValue>);
483-
void genTMABulkStoreC8(llvm::ArrayRef<fir::ExtendedValue>);
484-
void genTMABulkWaitGroup(llvm::ArrayRef<fir::ExtendedValue>);
485429
mlir::Value genTrailz(mlir::Type, llvm::ArrayRef<mlir::Value>);
486430
fir::ExtendedValue genTransfer(mlir::Type,
487431
llvm::ArrayRef<fir::ExtendedValue>);
488432
fir::ExtendedValue genTranspose(mlir::Type,
489433
llvm::ArrayRef<fir::ExtendedValue>);
490-
mlir::Value genThisGrid(mlir::Type, llvm::ArrayRef<mlir::Value>);
491434
fir::ExtendedValue genThisImage(mlir::Type,
492435
llvm::ArrayRef<fir::ExtendedValue>);
493-
mlir::Value genThisThreadBlock(mlir::Type, llvm::ArrayRef<mlir::Value>);
494-
mlir::Value genThisWarp(mlir::Type, llvm::ArrayRef<mlir::Value>);
495-
void genThreadFence(llvm::ArrayRef<fir::ExtendedValue>);
496-
void genThreadFenceBlock(llvm::ArrayRef<fir::ExtendedValue>);
497-
void genThreadFenceSystem(llvm::ArrayRef<fir::ExtendedValue>);
498436
fir::ExtendedValue genTrim(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
499437
fir::ExtendedValue genUbound(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
500438
fir::ExtendedValue genUnlink(std::optional<mlir::Type> resultType,
501439
llvm::ArrayRef<fir::ExtendedValue> args);
502440
fir::ExtendedValue genUnpack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
503441
fir::ExtendedValue genVerify(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
504-
template <mlir::NVVM::VoteSyncKind kind>
505-
mlir::Value genVoteSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
506442

507443
/// Implement all conversion functions like DBLE, the first argument is
508444
/// the value to convert. There may be an additional KIND arguments that

flang/lib/Optimizer/Builder/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_flang_library(FIRBuilder
55
BoxValue.cpp
66
Character.cpp
77
Complex.cpp
8+
CUDAIntrinsicCall.cpp
89
CUFCommon.cpp
910
DoLoopHelper.cpp
1011
FIRBuilder.cpp

0 commit comments

Comments
 (0)