Skip to content

Commit 4c9b7ff

Browse files
authored
[LLVM] Introduce 'llvm-offload-wrapper' tool (llvm#153504)
Summary: This is a standalone tool that does the wrapper stage of the `clang-linker-wrapper`. We want this to be an external tool because currently there's no easy way to split apart what the clang-linker-wrapper is doing under the hood. With this tool, users can manually extract files with `clang-offload-packager`, feed them through `clang --target=<triple>` and then use this tool to generate a `.bc` file they can give to the linker. The goal here is to make reproducing the linker wrapper steps easier.
1 parent a0eb995 commit 4c9b7ff

File tree

5 files changed

+207
-0
lines changed

5 files changed

+207
-0
lines changed

clang/docs/ClangLinkerWrapper.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ only for the linker wrapper will be forwarded to the wrapped linker job.
6060
--v Display the version number and exit
6161
-- The separator for the wrapped linker arguments
6262
63+
The linker wrapper will generate the appropriate runtime calls to register the
64+
generated device binary with the offloading runtime. To do this step manually we
65+
provide the ``llvm-offload-wrapper`` utility.
66+
6367
Relocatable Linking
6468
===================
6569

llvm/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ set(LLVM_TEST_DEPENDS
118118
llvm-objdump
119119
llvm-opt-fuzzer
120120
llvm-opt-report
121+
llvm-offload-wrapper
121122
llvm-otool
122123
llvm-pdbutil
123124
llvm-profdata

llvm/test/Other/offload-wrapper.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
; RUN: llvm-offload-wrapper --triple=x86-64 -kind=hip %s -o %t.bc
2+
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP
3+
4+
; HIP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
5+
; HIP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
6+
; HIP-NEXT: @.fatbin_image = internal constant {{.*}}, section ".hip_fatbin"
7+
; HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
8+
; HIP-NEXT: @.hip.binary_handle = internal global ptr null
9+
; HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
10+
11+
; HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
12+
; HIP-NEXT: entry:
13+
; HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
14+
; HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
15+
; HIP-NEXT: call void @.hip.globals_reg(ptr %0)
16+
; HIP-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
17+
; HIP-NEXT: ret void
18+
; HIP-NEXT: }
19+
20+
; HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
21+
; HIP-NEXT: entry:
22+
; HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
23+
; HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
24+
; HIP-NEXT: ret void
25+
; HIP-NEXT: }
26+
27+
; RUN: llvm-offload-wrapper --triple=x86-64 -kind=cuda %s -o %t.bc
28+
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA
29+
30+
; CUDA: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
31+
; CUDA-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
32+
; CUDA-NEXT: @.fatbin_image = internal constant {{.*}}, section ".nv_fatbin"
33+
; CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8
34+
; CUDA-NEXT: @.cuda.binary_handle = internal global ptr null
35+
; CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }]
36+
37+
; CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" {
38+
; CUDA-NEXT: entry:
39+
; CUDA-NEXT: %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper)
40+
; CUDA-NEXT: store ptr %0, ptr @.cuda.binary_handle, align 8
41+
; CUDA-NEXT: call void @.cuda.globals_reg(ptr %0)
42+
; CUDA-NEXT: call void @__cudaRegisterFatBinaryEnd(ptr %0)
43+
; CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
44+
; CUDA-NEXT: ret void
45+
; CUDA-NEXT: }
46+
47+
; CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
48+
; CUDA-NEXT: entry:
49+
; CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8
50+
; CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0)
51+
; CUDA-NEXT: ret void
52+
; CUDA-NEXT: }
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
set(LLVM_LINK_COMPONENTS
2+
BitWriter
3+
Object
4+
Option
5+
FrontendOffloading
6+
Support
7+
TargetParser
8+
)
9+
10+
add_llvm_tool(llvm-offload-wrapper
11+
llvm-offload-wrapper.cpp
12+
13+
DEPENDS
14+
intrinsics_gen
15+
)
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
//===- llvm-offload-wrapper: Create runtime registration code for devices -===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Provides a utility for generating runtime registration code for device code.
10+
// We take a binary image (CUDA fatbinary, HIP offload bundle, LLVM binary) and
11+
// create a new IR module that calls the respective runtime to load it on the
12+
// device.
13+
//
14+
//===----------------------------------------------------------------------===//
15+
16+
#include "llvm/Bitcode/BitcodeWriter.h"
17+
#include "llvm/Frontend/Offloading/OffloadWrapper.h"
18+
#include "llvm/Frontend/Offloading/Utility.h"
19+
#include "llvm/Object/OffloadBinary.h"
20+
#include "llvm/Support/CommandLine.h"
21+
#include "llvm/Support/FileOutputBuffer.h"
22+
#include "llvm/Support/FileSystem.h"
23+
#include "llvm/Support/InitLLVM.h"
24+
#include "llvm/Support/MemoryBuffer.h"
25+
#include "llvm/Support/Path.h"
26+
#include "llvm/Support/Signals.h"
27+
#include "llvm/Support/StringSaver.h"
28+
#include "llvm/Support/WithColor.h"
29+
#include "llvm/TargetParser/Host.h"
30+
31+
using namespace llvm;
32+
33+
static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
34+
35+
static cl::OptionCategory
36+
OffloadWrapeprCategory("llvm-offload-wrapper options");
37+
38+
static cl::opt<object::OffloadKind> Kind(
39+
"kind", cl::desc("Wrap for offload kind:"), cl::cat(OffloadWrapeprCategory),
40+
cl::Required,
41+
cl::values(clEnumValN(object::OFK_OpenMP, "openmp", "Wrap OpenMP binaries"),
42+
clEnumValN(object::OFK_Cuda, "cuda", "Wrap CUDA binaries"),
43+
clEnumValN(object::OFK_HIP, "hip", "Wrap HIP binaries")));
44+
45+
static cl::opt<std::string> OutputFile("o", cl::desc("Write output to <file>."),
46+
cl::value_desc("file"),
47+
cl::cat(OffloadWrapeprCategory));
48+
49+
static cl::list<std::string> InputFiles(cl::Positional,
50+
cl::desc("Wrap input from <file>"),
51+
cl::value_desc("file"), cl::OneOrMore,
52+
cl::cat(OffloadWrapeprCategory));
53+
54+
static cl::opt<std::string>
55+
TheTriple("triple", cl::desc("Target triple for the wrapper module"),
56+
cl::init(sys::getDefaultTargetTriple()),
57+
cl::cat(OffloadWrapeprCategory));
58+
59+
static Error wrapImages(ArrayRef<ArrayRef<char>> BuffersToWrap) {
60+
if (BuffersToWrap.size() > 1 &&
61+
(Kind == llvm::object::OFK_Cuda || Kind == llvm::object::OFK_HIP))
62+
return createStringError(
63+
"CUDA / HIP offloading uses a single fatbinary or offload bundle");
64+
65+
LLVMContext Context;
66+
Module M("offload.wrapper.module", Context);
67+
M.setTargetTriple(Triple());
68+
69+
switch (Kind) {
70+
case llvm::object::OFK_OpenMP:
71+
if (Error Err = offloading::wrapOpenMPBinaries(
72+
M, BuffersToWrap, offloading::getOffloadEntryArray(M),
73+
/*Suffix=*/"", /*Relocatable=*/false))
74+
return Err;
75+
break;
76+
case llvm::object::OFK_Cuda:
77+
if (Error Err = offloading::wrapCudaBinary(
78+
M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M),
79+
/*Suffix=*/"", /*EmitSurfacesAndTextures=*/false))
80+
return Err;
81+
break;
82+
case llvm::object::OFK_HIP:
83+
if (Error Err = offloading::wrapHIPBinary(
84+
M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M)))
85+
return Err;
86+
break;
87+
default:
88+
return createStringError(getOffloadKindName(Kind) +
89+
" wrapping is not supported");
90+
}
91+
92+
int FD = -1;
93+
if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD))
94+
return errorCodeToError(EC);
95+
llvm::raw_fd_ostream OS(FD, true);
96+
WriteBitcodeToFile(M, OS);
97+
98+
return Error::success();
99+
}
100+
101+
int main(int argc, char **argv) {
102+
InitLLVM X(argc, argv);
103+
cl::HideUnrelatedOptions(OffloadWrapeprCategory);
104+
cl::ParseCommandLineOptions(
105+
argc, argv,
106+
"Generate runtime registration code for a device binary image\n");
107+
108+
if (Help) {
109+
cl::PrintHelpMessage();
110+
return EXIT_SUCCESS;
111+
}
112+
113+
auto ReportError = [argv](Error E) {
114+
logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
115+
exit(EXIT_FAILURE);
116+
};
117+
118+
SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
119+
SmallVector<ArrayRef<char>> BuffersToWrap;
120+
for (StringRef Input : InputFiles) {
121+
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
122+
MemoryBuffer::getFileOrSTDIN(Input);
123+
if (std::error_code EC = BufferOrErr.getError())
124+
ReportError(createFileError(Input, EC));
125+
std::unique_ptr<MemoryBuffer> &Buffer =
126+
Buffers.emplace_back(std::move(*BufferOrErr));
127+
BuffersToWrap.emplace_back(
128+
ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
129+
}
130+
131+
if (Error Err = wrapImages(BuffersToWrap))
132+
ReportError(std::move(Err));
133+
134+
return EXIT_SUCCESS;
135+
}

0 commit comments

Comments
 (0)