Skip to content

Commit 1f1ca6c

Browse files
committed
Relocate and use OpenMPOpt.cpp's isKernelCC
Also, regenerate OpenMP tests from current clang so they see the new kernel calling conventions.
1 parent b9447c0 commit 1f1ca6c

File tree

7 files changed

+430
-426
lines changed

7 files changed

+430
-426
lines changed

llvm/include/llvm/IR/Function.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,18 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
284284
setValueSubclassData((getSubclassDataFromValue() & 0xc00f) | (ID << 4));
285285
}
286286

287+
/// Does it have a kernel calling convention?
288+
bool hasKernelCallingConv() const {
289+
switch (getCallingConv()) {
290+
default:
291+
return false;
292+
case CallingConv::PTX_Kernel:
293+
case CallingConv::AMDGPU_KERNEL:
294+
case CallingConv::SPIR_KERNEL:
295+
return true;
296+
}
297+
}
298+
287299
enum ProfileCountType { PCT_Real, PCT_Synthetic };
288300

289301
/// Class to represent profile counts.

llvm/lib/Analysis/KernelInfo.cpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,6 @@ class KernelInfo {
7878

7979
} // end anonymous namespace
8080

81-
static bool isKernelFunction(Function &F) {
82-
// TODO: Is this general enough? Consider languages beyond OpenMP.
83-
return F.hasFnAttribute("kernel");
84-
}
85-
8681
static void identifyCallee(OptimizationRemark &R, const Module *M,
8782
const Value *V, StringRef Kind = "") {
8883
SmallString<100> Name; // might be function name or asm expression
@@ -292,7 +287,7 @@ void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
292287
KI.FlatAddrspace = TheTTI.getFlatAddressSpace();
293288

294289
// Record function properties.
295-
KI.ExternalNotKernel = F.hasExternalLinkage() && !isKernelFunction(F);
290+
KI.ExternalNotKernel = F.hasExternalLinkage() && !F.hasKernelCallingConv();
296291
for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) {
297292
if (auto Val = parseFnAttrAsInteger(F, Name))
298293
KI.LaunchBounds.push_back({Name, *Val});

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5905,17 +5905,6 @@ bool llvm::omp::isOpenMPKernel(Function &Fn) {
59055905
return Fn.hasFnAttribute("kernel");
59065906
}
59075907

5908-
static bool isKernelCC(Function &F) {
5909-
switch (F.getCallingConv()) {
5910-
default:
5911-
return false;
5912-
case CallingConv::PTX_Kernel:
5913-
case CallingConv::AMDGPU_KERNEL:
5914-
case CallingConv::SPIR_KERNEL:
5915-
return true;
5916-
}
5917-
}
5918-
59195908
KernelSet llvm::omp::getDeviceKernels(Module &M) {
59205909
// TODO: Create a more cross-platform way of determining device kernels.
59215910
KernelSet Kernels;
@@ -5948,7 +5937,7 @@ KernelSet llvm::omp::getDeviceKernels(Module &M) {
59485937
}
59495938

59505939
for (Function &F : M)
5951-
if (isKernelCC(F))
5940+
if (F.hasKernelCallingConv())
59525941
ProcessKernel(F);
59535942

59545943
return Kernels;

llvm/test/Analysis/KernelInfo/linkage.ll

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,47 @@
77
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
88
target triple = "nvptx64-nvidia-cuda"
99

10-
; CHECK: remark: test.c:3:0: in function 'f', ExternalNotKernel = 1
11-
define external void @f() !dbg !10 {
10+
; CHECK: remark: test.c:13:0: in artificial function 'extNotKer', ExternalNotKernel = 1
11+
define external void @extNotKer() !dbg !10 {
1212
entry:
1313
ret void
1414
}
1515

16-
; CHECK: remark: test.c:13:0: in artificial function 'g', ExternalNotKernel = 1
17-
define void @g() !dbg !20 {
16+
; CHECK: remark: test.c:23:0: in function 'impNotKer', ExternalNotKernel = 1
17+
define void @impNotKer() !dbg !20 {
1818
entry:
1919
ret void
2020
}
2121

22-
; CHECK: remark: test.c:23:0: in function 'h', ExternalNotKernel = 0
23-
define external void @h() #0 !dbg !30 {
22+
; CHECK: remark: test.c:33:0: in artificial function 'weakNotKer', ExternalNotKernel = 0
23+
define weak void @weakNotKer() !dbg !30 {
2424
entry:
2525
ret void
2626
}
2727

28-
; CHECK: remark: test.c:33:0: in artificial function 'i', ExternalNotKernel = 0
29-
define weak void @i() !dbg !40 {
28+
; CHECK: remark: test.c:43:0: in function 'extPtxKer', ExternalNotKernel = 0
29+
define external ptx_kernel void @extPtxKer() !dbg !40 {
3030
entry:
3131
ret void
3232
}
3333

34-
attributes #0 = { "kernel" }
34+
; CHECK: remark: test.c:53:0: in artificial function 'extAmdgpuKer', ExternalNotKernel = 0
35+
define external amdgpu_kernel void @extAmdgpuKer() !dbg !50 {
36+
entry:
37+
ret void
38+
}
39+
40+
; CHECK: remark: test.c:63:0: in function 'extSpirKer', ExternalNotKernel = 0
41+
define external spir_kernel void @extSpirKer() !dbg !60 {
42+
entry:
43+
ret void
44+
}
45+
46+
; CHECK: remark: test.c:73:0: in artificial function 'weakKer', ExternalNotKernel = 0
47+
define weak ptx_kernel void @weakKer() !dbg !70 {
48+
entry:
49+
ret void
50+
}
3551

3652
!llvm.module.flags = !{!0}
3753
!llvm.dbg.cu = !{!1}
@@ -41,11 +57,12 @@ attributes #0 = { "kernel" }
4157
!2 = !DIFile(filename: "test.c", directory: "/tmp")
4258
!3 = !{null}
4359
!4 = !{}
44-
!10 = distinct !DISubprogram(name: "f", scope: !2, file: !2, line: 3, type: !11, scopeLine: 3, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !4)
45-
!11 = !DISubroutineType(types: !3)
46-
!20 = distinct !DISubprogram(name: "g", scope: !2, file: !2, line: 13, type: !21, scopeLine: 13, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !1, retainedNodes: !4)
47-
!21 = distinct !DISubroutineType(types: !3)
48-
!30 = distinct !DISubprogram(name: "h", scope: !2, file: !2, line: 23, type: !31, scopeLine: 23, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !4)
49-
!31 = distinct !DISubroutineType(types: !3)
50-
!40 = distinct !DISubprogram(name: "i", scope: !2, file: !2, line: 33, type: !41, scopeLine: 33, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !1, retainedNodes: !4)
51-
!41 = distinct !DISubroutineType(types: !3)
60+
!5 = !DISubroutineType(types: !3)
61+
62+
!10 = distinct !DISubprogram(name: "extNotKer", scope: !2, file: !2, line: 13, type: !5, scopeLine: 13, flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !4)
63+
!20 = distinct !DISubprogram(name: "impNotKer", scope: !2, file: !2, line: 23, type: !5, scopeLine: 23, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !4)
64+
!30 = distinct !DISubprogram(name: "weakNotKer", scope: !2, file: !2, line: 33, type: !5, scopeLine: 33, flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !4)
65+
!40 = distinct !DISubprogram(name: "extPtxKer", scope: !2, file: !2, line: 43, type: !5, scopeLine: 43, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !4)
66+
!50 = distinct !DISubprogram(name: "extAmdgpuKer", scope: !2, file: !2, line: 53, type: !5, scopeLine: 53, flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !4)
67+
!60 = distinct !DISubprogram(name: "extSpirKer", scope: !2, file: !2, line: 63, type: !5, scopeLine: 63, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !4)
68+
!70 = distinct !DISubprogram(name: "weakKer", scope: !2, file: !2, line: 73, type: !5, scopeLine: 73, flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !4)

llvm/test/Analysis/KernelInfo/openmp/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ more exhaustively check KernelInfoPrinter features using reduced LLVM IR.
88
The LLVM IR in each test file `$TEST` can be regenerated as follows in the case
99
that Clang OpenMP codegen changes or it becomes desirable to adjust the source
1010
OpenMP program below. First, remove the existing LLVM IR from `$TEST`. Then,
11-
where `$TARGET` (e.g., `nvptx64-nvidia-cuda` or `amdgcn-amd-amdhsa`) depends on
12-
`$TEST`:
11+
where `$TARGET` (e.g., `nvptx64-nvidia-cuda-sm_70` or `amdgcn-amd-amdhsa-gfx906`)
12+
depends on `$TEST`:
1313

1414
```
1515
$ cd /tmp
@@ -34,7 +34,7 @@ void h(int i) {
3434
}
3535
}
3636
37-
$ clang -g -fopenmp -fopenmp-targets=$TARGET -save-temps -c test.c
37+
$ clang -g -fopenmp --offload-arch=native -save-temps -c test.c
3838
$ llvm-dis test-openmp-$TARGET.bc
3939
$ cat test-openmp-$TARGET.ll >> $TEST
4040
```

0 commit comments

Comments
 (0)