Skip to content

Commit 71f0d22

Browse files
Pierre-vhIanWood1
authored andcommitted
[AMDGPU][SplitModule] Do not create empty modules (llvm#135761)
Skip creating a module if no function is going to be imported. Also includes a change so that if the first partition is empty (which can happen), we import global with non-local linkage into the first non-empty partition, instead of P0 all the time. I thought we'd need to change users of the SplitModule callback so they can deal with less modules than the number requested, but no. We already return only 1 module in some cases and it seems to be handled just fine. Fixes SWDEV-523146
1 parent f8eb6af commit 71f0d22

File tree

6 files changed

+63
-34
lines changed

6 files changed

+63
-34
lines changed

llvm/include/llvm/Target/TargetMachine.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,9 @@ class TargetMachine {
451451
/// Entry point for module splitting. Targets can implement custom module
452452
/// splitting logic, mainly used by LTO for --lto-partitions.
453453
///
454+
/// On success, this guarantees that between 1 and \p NumParts modules were
455+
/// created and passed to \p ModuleCallBack.
456+
///
454457
/// \returns `true` if the module was split, `false` otherwise. When `false`
455458
/// is returned, it is assumed that \p ModuleCallback has never been called
456459
/// and \p M has not been modified.

llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,10 @@ static void splitAMDGPUModule(
14791479
<< "' - Partition summaries will not be printed\n";
14801480
}
14811481

1482+
// One module will import all GlobalValues that are not Functions
1483+
// and are not subject to conservative import.
1484+
bool ImportAllGVs = true;
1485+
14821486
for (unsigned PID = 0; PID < NumParts; ++PID) {
14831487
SplitModuleTimer SMT2("modules_creation",
14841488
"creating modules for each partition");
@@ -1488,6 +1492,13 @@ static void splitAMDGPUModule(
14881492
for (unsigned NodeID : (*Proposal)[PID].set_bits())
14891493
FnsInPart.insert(&SG.getNode(NodeID).getFunction());
14901494

1495+
// Don't create empty modules.
1496+
if (FnsInPart.empty()) {
1497+
LLVM_DEBUG(dbgs() << "[split] P" << PID
1498+
<< " is empty, not creating module\n");
1499+
continue;
1500+
}
1501+
14911502
ValueToValueMapTy VMap;
14921503
CostType PartCost = 0;
14931504
std::unique_ptr<Module> MPart(
@@ -1501,10 +1512,12 @@ static void splitAMDGPUModule(
15011512
return false;
15021513
}
15031514

1504-
// Everything else goes in the first partition.
1505-
return needsConservativeImport(GV) || PID == 0;
1515+
// Everything else goes in the first non-empty module we create.
1516+
return ImportAllGVs || needsConservativeImport(GV);
15061517
}));
15071518

1519+
ImportAllGVs = false;
1520+
15081521
// FIXME: Aliases aren't seen often, and their handling isn't perfect so
15091522
// bugs are possible.
15101523

llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize-with-call.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0
22
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
33
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
4-
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
54

65
; 3 kernels:
76
; - A does a direct call to HelperA
@@ -11,14 +10,11 @@
1110
; The helper functions will get externalized, so C/A will end up
1211
; in the same partition.
1312

14-
; P0 is empty.
15-
; CHECK0: declare
13+
; CHECK0: define amdgpu_kernel void @B(ptr %dst)
1614

17-
; CHECK1: define amdgpu_kernel void @B(ptr %dst)
18-
19-
; CHECK2: define hidden void @HelperA()
20-
; CHECK2: define amdgpu_kernel void @A()
21-
; CHECK2: define amdgpu_kernel void @C()
15+
; CHECK1: define hidden void @HelperA()
16+
; CHECK1: define amdgpu_kernel void @A()
17+
; CHECK1: define amdgpu_kernel void @C()
2218

2319
define internal void @HelperA() {
2420
ret void

llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5
22
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
33
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
4-
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
54

65
; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 -amdgpu-module-splitting-max-depth=0
76
; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 --implicit-check-not=define %s
@@ -15,19 +14,18 @@
1514
; Also check w/o large kernels processing to verify they are indeed handled
1615
; differently.
1716

18-
; P0 is empty
19-
; CHECK0: declare
17+
; Only two partitions created for the first command.
2018

21-
; CHECK1: define internal void @HelperC()
22-
; CHECK1: define weak_odr amdgpu_kernel void @C
19+
; CHECK0: define internal void @HelperC()
20+
; CHECK0: define weak_odr amdgpu_kernel void @C
2321

24-
; CHECK2: define internal void @large2()
25-
; CHECK2: define internal void @large1()
26-
; CHECK2: define internal void @large0()
27-
; CHECK2: define internal void @HelperA()
28-
; CHECK2: define internal void @HelperB()
29-
; CHECK2: define amdgpu_kernel void @A
30-
; CHECK2: define weak_odr amdgpu_kernel void @B
22+
; CHECK1: define internal void @large2()
23+
; CHECK1: define internal void @large1()
24+
; CHECK1: define internal void @large0()
25+
; CHECK1: define internal void @HelperA()
26+
; CHECK1: define internal void @HelperB()
27+
; CHECK1: define amdgpu_kernel void @A
28+
; CHECK1: define weak_odr amdgpu_kernel void @B
3129

3230
; NOLARGEKERNELS-CHECK0: define internal void @HelperC()
3331
; NOLARGEKERNELS-CHECK0: define weak_odr amdgpu_kernel void @C

llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5
22
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
33
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
4-
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
54

65
; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 -amdgpu-module-splitting-max-depth=0
76
; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 --implicit-check-not=define %s
@@ -15,19 +14,18 @@
1514
; Also check w/o large kernels processing to verify they are indeed handled
1615
; differently.
1716

18-
; P0 is empty
19-
; CHECK0: declare
17+
; Only 2 partitions for the first command.
2018

21-
; CHECK1: define internal void @HelperC()
22-
; CHECK1: define amdgpu_kernel void @C
19+
; CHECK0: define internal void @HelperC()
20+
; CHECK0: define amdgpu_kernel void @C
2321

24-
; CHECK2: define internal void @large2()
25-
; CHECK2: define internal void @large1()
26-
; CHECK2: define internal void @large0()
27-
; CHECK2: define internal void @HelperA()
28-
; CHECK2: define internal void @HelperB()
29-
; CHECK2: define amdgpu_kernel void @A
30-
; CHECK2: define amdgpu_kernel void @B
22+
; CHECK1: define internal void @large2()
23+
; CHECK1: define internal void @large1()
24+
; CHECK1: define internal void @large0()
25+
; CHECK1: define internal void @HelperA()
26+
; CHECK1: define internal void @HelperB()
27+
; CHECK1: define amdgpu_kernel void @A
28+
; CHECK1: define amdgpu_kernel void @B
3129

3230
; NOLARGEKERNELS-CHECK0: define internal void @HelperC()
3331
; NOLARGEKERNELS-CHECK0: define amdgpu_kernel void @C
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5
2+
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
3+
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
4+
5+
; Only 2 out of 3 partitions are created, check the external global is preserved in the first partition.
6+
7+
; CHECK0: @foobar = linkonce_odr global i64 52
8+
; CHECK0: define amdgpu_kernel void @B
9+
10+
; CHECK1-NOT: @foobar = linkonce_odr global i64 52
11+
; CHECK1: define amdgpu_kernel void @A
12+
13+
@foobar = linkonce_odr global i64 52
14+
15+
define amdgpu_kernel void @A() {
16+
ret void
17+
}
18+
19+
define amdgpu_kernel void @B() {
20+
ret void
21+
}

0 commit comments

Comments
 (0)