Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions clang/docs/ClangLinkerWrapper.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ only for the linker wrapper will be forwarded to the wrapped linker job.
--v Display the version number and exit
-- The separator for the wrapped linker arguments

The linker wrapper will generate the appropriate runtime calls to register the
generated device binary with the offloading runtime. To do this step manually we
provide the ``llvm-offload-wrapper`` utility.

Relocatable Linking
===================

Expand Down
1 change: 1 addition & 0 deletions llvm/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ set(LLVM_TEST_DEPENDS
llvm-objdump
llvm-opt-fuzzer
llvm-opt-report
llvm-offload-wrapper
llvm-otool
llvm-pdbutil
llvm-profdata
Expand Down
52 changes: 52 additions & 0 deletions llvm/test/Other/offload-wrapper.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
; RUN: llvm-offload-wrapper --triple=x86-64 -kind=hip %s -o %t.bc
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP

; HIP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
; HIP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
; HIP-NEXT: @.fatbin_image = internal constant {{.*}}, section ".hip_fatbin"
; HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
; HIP-NEXT: @.hip.binary_handle = internal global ptr null
; HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]

; HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
; HIP-NEXT: entry:
; HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
; HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
; HIP-NEXT: call void @.hip.globals_reg(ptr %0)
; HIP-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
; HIP-NEXT: ret void
; HIP-NEXT: }

; HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
; HIP-NEXT: entry:
; HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
; HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
; HIP-NEXT: ret void
; HIP-NEXT: }

; RUN: llvm-offload-wrapper --triple=x86-64 -kind=cuda %s -o %t.bc
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA

; CUDA: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
; CUDA-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
; CUDA-NEXT: @.fatbin_image = internal constant {{.*}}, section ".nv_fatbin"
; CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8
; CUDA-NEXT: @.cuda.binary_handle = internal global ptr null
; CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }]

; CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" {
; CUDA-NEXT: entry:
; CUDA-NEXT: %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper)
; CUDA-NEXT: store ptr %0, ptr @.cuda.binary_handle, align 8
; CUDA-NEXT: call void @.cuda.globals_reg(ptr %0)
; CUDA-NEXT: call void @__cudaRegisterFatBinaryEnd(ptr %0)
; CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
; CUDA-NEXT: ret void
; CUDA-NEXT: }

; CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
; CUDA-NEXT: entry:
; CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8
; CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0)
; CUDA-NEXT: ret void
; CUDA-NEXT: }
15 changes: 15 additions & 0 deletions llvm/tools/llvm-offload-wrapper/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
set(LLVM_LINK_COMPONENTS
BitWriter
Object
Option
FrontendOffloading
Support
TargetParser
)

add_llvm_tool(llvm-offload-wrapper
llvm-offload-wrapper.cpp

DEPENDS
intrinsics_gen
)
135 changes: 135 additions & 0 deletions llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
//===- llvm-offload-wrapper: Create runtime registration code for devices -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Provides a utility for generating runtime registration code for device code.
// We take a binary image (CUDA fatbinary, HIP offload bundle, LLVM binary) and
// create a new IR module that calls the respective runtime to load it on the
// device.
//
//===----------------------------------------------------------------------===//

#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Frontend/Offloading/OffloadWrapper.h"
#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/Object/OffloadBinary.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/WithColor.h"
#include "llvm/TargetParser/Host.h"

using namespace llvm;

static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);

static cl::OptionCategory
OffloadWrapeprCategory("llvm-offload-wrapper options");

static cl::opt<object::OffloadKind> Kind(
"kind", cl::desc("Wrap for offload kind:"), cl::cat(OffloadWrapeprCategory),
cl::Required,
cl::values(clEnumValN(object::OFK_OpenMP, "openmp", "Wrap OpenMP binaries"),
clEnumValN(object::OFK_Cuda, "cuda", "Wrap CUDA binaries"),
clEnumValN(object::OFK_HIP, "hip", "Wrap HIP binaries")));

static cl::opt<std::string> OutputFile("o", cl::desc("Write output to <file>."),
cl::value_desc("file"),
cl::cat(OffloadWrapeprCategory));

static cl::list<std::string> InputFiles(cl::Positional,
cl::desc("Wrap input from <file>"),
cl::value_desc("file"), cl::OneOrMore,
cl::cat(OffloadWrapeprCategory));

static cl::opt<std::string>
TheTriple("triple", cl::desc("Target triple for the wrapper module"),
cl::init(sys::getDefaultTargetTriple()),
cl::cat(OffloadWrapeprCategory));

static Error wrapImages(ArrayRef<ArrayRef<char>> BuffersToWrap) {
if (BuffersToWrap.size() > 1 &&
(Kind == llvm::object::OFK_Cuda || Kind == llvm::object::OFK_HIP))
return createStringError(
"CUDA / HIP offloading uses a single fatbinary or offload bundle");

LLVMContext Context;
Module M("offload.wrapper.module", Context);
M.setTargetTriple(Triple());

switch (Kind) {
case llvm::object::OFK_OpenMP:
if (Error Err = offloading::wrapOpenMPBinaries(
M, BuffersToWrap, offloading::getOffloadEntryArray(M),
/*Suffix=*/"", /*Relocatable=*/false))
return Err;
break;
case llvm::object::OFK_Cuda:
if (Error Err = offloading::wrapCudaBinary(
M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M),
/*Suffix=*/"", /*EmitSurfacesAndTextures=*/false))
return Err;
break;
case llvm::object::OFK_HIP:
if (Error Err = offloading::wrapHIPBinary(
M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M)))
return Err;
break;
default:
return createStringError(getOffloadKindName(Kind) +
" wrapping is not supported");
}

int FD = -1;
if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD))
return errorCodeToError(EC);
llvm::raw_fd_ostream OS(FD, true);
WriteBitcodeToFile(M, OS);

return Error::success();
}

int main(int argc, char **argv) {
InitLLVM X(argc, argv);
cl::HideUnrelatedOptions(OffloadWrapeprCategory);
cl::ParseCommandLineOptions(
argc, argv,
"Generate runtime registration code for a device binary image\n");

if (Help) {
cl::PrintHelpMessage();
return EXIT_SUCCESS;
}

auto ReportError = [argv](Error E) {
logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
exit(EXIT_FAILURE);
};

SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
SmallVector<ArrayRef<char>> BuffersToWrap;
for (StringRef Input : InputFiles) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFileOrSTDIN(Input);
if (std::error_code EC = BufferOrErr.getError())
ReportError(createFileError(Input, EC));
std::unique_ptr<MemoryBuffer> &Buffer =
Buffers.emplace_back(std::move(*BufferOrErr));
BuffersToWrap.emplace_back(
ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
}

if (Error Err = wrapImages(BuffersToWrap))
ReportError(std::move(Err));

return EXIT_SUCCESS;
}
Loading