Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 9 additions & 26 deletions offload/liboffload/src/OffloadImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,11 @@ struct ol_event_impl_t {

struct ol_program_impl_t {
ol_program_impl_t(plugin::DeviceImageTy *Image,
std::unique_ptr<llvm::MemoryBuffer> ImageData,
const __tgt_device_image &DeviceImage)
: Image(Image), ImageData(std::move(ImageData)),
DeviceImage(DeviceImage) {}
llvm::MemoryBufferRef DeviceImage)
: Image(Image), DeviceImage(DeviceImage) {}
plugin::DeviceImageTy *Image;
std::unique_ptr<llvm::MemoryBuffer> ImageData;
std::mutex SymbolListMutex;
__tgt_device_image DeviceImage;
llvm::MemoryBufferRef DeviceImage;
llvm::StringMap<std::unique_ptr<ol_symbol_impl_t>> KernelSymbols;
llvm::StringMap<std::unique_ptr<ol_symbol_impl_t>> GlobalSymbols;
};
Expand Down Expand Up @@ -891,28 +888,14 @@ Error olMemFill_impl(ol_queue_handle_t Queue, void *Ptr, size_t PatternSize,
Error olCreateProgram_impl(ol_device_handle_t Device, const void *ProgData,
size_t ProgDataSize, ol_program_handle_t *Program) {
// Make a copy of the program binary in case it is released by the caller.
auto ImageData = MemoryBuffer::getMemBufferCopy(
StringRef(reinterpret_cast<const char *>(ProgData), ProgDataSize));

auto DeviceImage = __tgt_device_image{
const_cast<char *>(ImageData->getBuffer().data()),
const_cast<char *>(ImageData->getBuffer().data()) + ProgDataSize, nullptr,
nullptr};

ol_program_handle_t Prog =
new ol_program_impl_t(nullptr, std::move(ImageData), DeviceImage);

auto Res =
Device->Device->loadBinary(Device->Device->Plugin, &Prog->DeviceImage);
if (!Res) {
delete Prog;
StringRef Buffer(reinterpret_cast<const char *>(ProgData), ProgDataSize);
Expected<plugin::DeviceImageTy *> Res =
Device->Device->loadBinary(Device->Device->Plugin, Buffer);
if (!Res)
return Res.takeError();
}
assert(*Res != nullptr && "loadBinary returned nullptr");

Prog->Image = *Res;
*Program = Prog;
assert(*Res && "loadBinary returned nullptr");

*Program = new ol_program_impl_t(*Res, (*Res)->getMemoryBuffer());
return Error::success();
}

Expand Down
28 changes: 14 additions & 14 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,8 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
struct AMDGPUDeviceImageTy : public DeviceImageTy {
/// Create the AMDGPU image with the id and the target image pointer.
AMDGPUDeviceImageTy(int32_t ImageId, GenericDeviceTy &Device,
const __tgt_device_image *TgtImage)
: DeviceImageTy(ImageId, Device, TgtImage) {}
std::unique_ptr<MemoryBuffer> &&TgtImage)
: DeviceImageTy(ImageId, Device, std::move(TgtImage)) {}

/// Prepare and load the executable corresponding to the image.
Error loadExecutable(const AMDGPUDeviceTy &Device);
Expand Down Expand Up @@ -2160,7 +2160,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
AMDGPUDeviceImageTy &AMDImage = static_cast<AMDGPUDeviceImageTy &>(*Image);

// Unload the executable of the image.
return AMDImage.unloadExecutable();
if (auto Err = AMDImage.unloadExecutable())
return Err;

// Destroy the associated memory and invalidate the object.
Plugin.free(Image);
return Error::success();
}

/// Deinitialize the device and release its resources.
Expand All @@ -2183,18 +2188,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {

virtual Error callGlobalConstructors(GenericPluginTy &Plugin,
DeviceImageTy &Image) override {
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
if (Handler.isSymbolInImage(*this, Image, "amdgcn.device.fini"))
Image.setPendingGlobalDtors();

return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/true);
}

virtual Error callGlobalDestructors(GenericPluginTy &Plugin,
DeviceImageTy &Image) override {
if (Image.hasPendingGlobalDtors())
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/false);
return Plugin::success();
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/false);
}

uint64_t getStreamBusyWaitMicroseconds() const { return OMPX_StreamBusyWait; }
Expand Down Expand Up @@ -2321,11 +2320,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}

/// Load the binary image into the device and allocate an image object.
Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage,
int32_t ImageId) override {
Expected<DeviceImageTy *>
loadBinaryImpl(std::unique_ptr<MemoryBuffer> &&TgtImage,
int32_t ImageId) override {
// Allocate and initialize the image object.
AMDGPUDeviceImageTy *AMDImage = Plugin.allocate<AMDGPUDeviceImageTy>();
new (AMDImage) AMDGPUDeviceImageTy(ImageId, *this, TgtImage);
new (AMDImage) AMDGPUDeviceImageTy(ImageId, *this, std::move(TgtImage));

// Load the HSA executable.
if (Error Err = AMDImage->loadExecutable(*this))
Expand Down Expand Up @@ -3105,7 +3105,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
// Perform a quick check for the named kernel in the image. The kernel
// should be created by the 'amdgpu-lower-ctor-dtor' pass.
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
if (IsCtor && !Handler.isSymbolInImage(*this, Image, KernelName))
if (!Handler.isSymbolInImage(*this, Image, KernelName))
return Plugin::success();

// Allocate and construct the AMDGPU kernel.
Expand Down
23 changes: 5 additions & 18 deletions offload/plugins-nextgen/common/include/JIT.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,27 +51,22 @@ struct JITEngine {
/// Run jit compilation if \p Image is a bitcode image, otherwise simply
/// return \p Image. It is expected to return a memory buffer containing the
/// generated device image that could be loaded to the device directly.
Expected<const __tgt_device_image *>
process(const __tgt_device_image &Image,
target::plugin::GenericDeviceTy &Device);

/// Remove \p Image from the jit engine's cache
void erase(const __tgt_device_image &Image,
target::plugin::GenericDeviceTy &Device);
Expected<std::unique_ptr<MemoryBuffer>>
process(StringRef Image, target::plugin::GenericDeviceTy &Device);

private:
/// Compile the bitcode image \p Image and generate the binary image that can
/// be loaded to the target device of the triple \p Triple architecture \p
/// MCpu. \p PostProcessing will be called after codegen to handle cases such
/// as assembler as an external tool.
Expected<const __tgt_device_image *>
compile(const __tgt_device_image &Image, const std::string &ComputeUnitKind,
Expected<std::unique_ptr<MemoryBuffer>>
compile(StringRef Image, const std::string &ComputeUnitKind,
PostProcessingFn PostProcessing);

/// Create or retrieve the object image file from the file system or via
/// compilation of the \p Image.
Expected<std::unique_ptr<MemoryBuffer>>
getOrCreateObjFile(const __tgt_device_image &Image, LLVMContext &Ctx,
getOrCreateObjFile(StringRef Image, LLVMContext &Ctx,
const std::string &ComputeUnitKind);

/// Run backend, which contains optimization and code generation.
Expand All @@ -92,14 +87,6 @@ struct JITEngine {
struct ComputeUnitInfo {
/// LLVM Context in which the modules will be constructed.
LLVMContext Context;

/// A map of embedded IR images to the buffer used to store JITed code
DenseMap<const __tgt_device_image *, std::unique_ptr<MemoryBuffer>>
JITImages;

/// A map of embedded IR images to JITed images.
DenseMap<const __tgt_device_image *, std::unique_ptr<__tgt_device_image>>
TgtImageMap;
};

/// Map from (march) "CPUs" (e.g., sm_80, or gfx90a), which we call compute
Expand Down
44 changes: 10 additions & 34 deletions offload/plugins-nextgen/common/include/PluginInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,60 +306,36 @@ class DeviceImageTy {
/// not unique between different device; they may overlap.
int32_t ImageId;

/// The pointer to the raw __tgt_device_image.
const __tgt_device_image *TgtImage;
const __tgt_device_image *TgtImageBitcode;
/// The managed image data.
std::unique_ptr<MemoryBuffer> Image;

/// Reference to the device this image is loaded on.
GenericDeviceTy &Device;

/// If this image has any global destructors that much be called.
/// FIXME: This is only required because we currently have no invariants
/// towards the lifetime of the underlying image. We should either copy
/// the image into memory locally or erase the pointers after init.
bool PendingGlobalDtors;

public:
virtual ~DeviceImageTy() = default;

DeviceImageTy(int32_t Id, GenericDeviceTy &Device,
const __tgt_device_image *Image)
: ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr), Device(Device),
PendingGlobalDtors(false) {
assert(TgtImage && "Invalid target image");
}
std::unique_ptr<MemoryBuffer> &&Image)
: ImageId(Id), Image(std::move(Image)), Device(Device) {}

/// Get the image identifier within the device.
int32_t getId() const { return ImageId; }

/// Get the device that this image is loaded onto.
GenericDeviceTy &getDevice() const { return Device; }

/// Get the pointer to the raw __tgt_device_image.
const __tgt_device_image *getTgtImage() const { return TgtImage; }

void setTgtImageBitcode(const __tgt_device_image *TgtImageBitcode) {
this->TgtImageBitcode = TgtImageBitcode;
}

const __tgt_device_image *getTgtImageBitcode() const {
return TgtImageBitcode;
}

/// Get the image starting address.
void *getStart() const { return TgtImage->ImageStart; }
const void *getStart() const { return Image->getBufferStart(); }

/// Get the image size.
size_t getSize() const {
return utils::getPtrDiff(TgtImage->ImageEnd, TgtImage->ImageStart);
}
size_t getSize() const { return Image->getBufferSize(); }

/// Get a memory buffer reference to the whole image.
MemoryBufferRef getMemoryBuffer() const {
return MemoryBufferRef(StringRef((const char *)getStart(), getSize()),
"Image");
}
/// Accessors to the boolean value
bool setPendingGlobalDtors() { return PendingGlobalDtors = true; }
bool hasPendingGlobalDtors() const { return PendingGlobalDtors; }
};

/// Class implementing common functionalities of offload kernels. Each plugin
Expand Down Expand Up @@ -831,9 +807,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {

/// Load the binary image into the device and return the target table.
Expected<DeviceImageTy *> loadBinary(GenericPluginTy &Plugin,
const __tgt_device_image *TgtImage);
StringRef TgtImage);
virtual Expected<DeviceImageTy *>
loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0;
loadBinaryImpl(std::unique_ptr<MemoryBuffer> &&TgtImage, int32_t ImageId) = 0;

/// Unload a previously loaded Image from the device
Error unloadBinary(DeviceImageTy *Image);
Expand Down
67 changes: 14 additions & 53 deletions offload/plugins-nextgen/common/src/JIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,6 @@ using namespace omp::target;

namespace {

bool isImageBitcode(const __tgt_device_image &Image) {
StringRef Binary(reinterpret_cast<const char *>(Image.ImageStart),
utils::getPtrDiff(Image.ImageEnd, Image.ImageStart));

return identify_magic(Binary) == file_magic::bitcode;
}

Expected<std::unique_ptr<Module>>
createModuleFromMemoryBuffer(std::unique_ptr<MemoryBuffer> &MB,
LLVMContext &Context) {
Expand All @@ -66,12 +59,10 @@ createModuleFromMemoryBuffer(std::unique_ptr<MemoryBuffer> &MB,
"failed to create module");
return std::move(Mod);
}
Expected<std::unique_ptr<Module>>
createModuleFromImage(const __tgt_device_image &Image, LLVMContext &Context) {
StringRef Data((const char *)Image.ImageStart,
utils::getPtrDiff(Image.ImageEnd, Image.ImageStart));
Expected<std::unique_ptr<Module>> createModuleFromImage(StringRef Image,
LLVMContext &Context) {
std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
Data, /*BufferName=*/"", /*RequiresNullTerminator=*/false);
Image, /*BufferName=*/"", /*RequiresNullTerminator=*/false);
return createModuleFromMemoryBuffer(MB, Context);
}

Expand Down Expand Up @@ -238,7 +229,7 @@ JITEngine::backend(Module &M, const std::string &ComputeUnitKind,
}

Expected<std::unique_ptr<MemoryBuffer>>
JITEngine::getOrCreateObjFile(const __tgt_device_image &Image, LLVMContext &Ctx,
JITEngine::getOrCreateObjFile(StringRef Image, LLVMContext &Ctx,
const std::string &ComputeUnitKind) {

// Check if the user replaces the module at runtime with a finished object.
Expand Down Expand Up @@ -277,58 +268,28 @@ JITEngine::getOrCreateObjFile(const __tgt_device_image &Image, LLVMContext &Ctx,
return backend(*Mod, ComputeUnitKind, JITOptLevel);
}

Expected<const __tgt_device_image *>
JITEngine::compile(const __tgt_device_image &Image,
const std::string &ComputeUnitKind,
Expected<std::unique_ptr<MemoryBuffer>>
JITEngine::compile(StringRef Image, const std::string &ComputeUnitKind,
PostProcessingFn PostProcessing) {
std::lock_guard<std::mutex> Lock(ComputeUnitMapMutex);

// Check if we JITed this image for the given compute unit kind before.
ComputeUnitInfo &CUI = ComputeUnitMap[ComputeUnitKind];
if (CUI.TgtImageMap.contains(&Image))
return CUI.TgtImageMap[&Image].get();

auto ObjMBOrErr = getOrCreateObjFile(Image, CUI.Context, ComputeUnitKind);
LLVMContext Ctz;
auto ObjMBOrErr = getOrCreateObjFile(Image, Ctz, ComputeUnitKind);
if (!ObjMBOrErr)
return ObjMBOrErr.takeError();

auto ImageMBOrErr = PostProcessing(std::move(*ObjMBOrErr));
if (!ImageMBOrErr)
return ImageMBOrErr.takeError();

CUI.JITImages.insert({&Image, std::move(*ImageMBOrErr)});
auto &ImageMB = CUI.JITImages[&Image];
CUI.TgtImageMap.insert({&Image, std::make_unique<__tgt_device_image>()});
auto &JITedImage = CUI.TgtImageMap[&Image];
*JITedImage = Image;
JITedImage->ImageStart = const_cast<char *>(ImageMB->getBufferStart());
JITedImage->ImageEnd = const_cast<char *>(ImageMB->getBufferEnd());

return JITedImage.get();
return PostProcessing(std::move(*ObjMBOrErr));
}

Expected<const __tgt_device_image *>
JITEngine::process(const __tgt_device_image &Image,
target::plugin::GenericDeviceTy &Device) {
const std::string &ComputeUnitKind = Device.getComputeUnitKind();
Expected<std::unique_ptr<MemoryBuffer>>
JITEngine::process(StringRef Image, target::plugin::GenericDeviceTy &Device) {
assert(identify_magic(Image) == file_magic::bitcode && "Image not LLVM-IR");

const std::string &ComputeUnitKind = Device.getComputeUnitKind();
PostProcessingFn PostProcessing = [&Device](std::unique_ptr<MemoryBuffer> MB)
-> Expected<std::unique_ptr<MemoryBuffer>> {
return Device.doJITPostProcessing(std::move(MB));
};

if (isImageBitcode(Image))
return compile(Image, ComputeUnitKind, PostProcessing);

return &Image;
}

void JITEngine::erase(const __tgt_device_image &Image,
target::plugin::GenericDeviceTy &Device) {
std::lock_guard<std::mutex> Lock(ComputeUnitMapMutex);
const std::string &ComputeUnitKind = Device.getComputeUnitKind();
ComputeUnitInfo &CUI = ComputeUnitMap[ComputeUnitKind];

CUI.TgtImageMap.erase(&Image);
CUI.JITImages.erase(&Image);
return compile(Image, ComputeUnitKind, PostProcessing);
}
Loading
Loading