Skip to content

System image compression with zstd #59227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 37 additions & 9 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,10 @@
#include <llvm/Support/FormatAdapters.h>
#include <llvm/Linker/Linker.h>


using namespace llvm;

#include <zstd.h>

#include "jitlayers.h"
#include "serialize.h"
#include "julia_assert.h"
Expand Down Expand Up @@ -2147,27 +2148,54 @@ void jl_dump_native_impl(void *native_code,
sysimgM.setDataLayout(DL);
sysimgM.setStackProtectorGuard(StackProtectorGuard);
sysimgM.setOverrideStackAlignment(OverrideStackAlignment);
Constant *data = ConstantDataArray::get(Context,
ArrayRef<uint8_t>((const unsigned char*)z->buf, z->size));

char *compression_str = getenv("JULIA_IMAGE_COMPRESSION");
unsigned long compression = compression_str ? strtoul(compression_str, nullptr, 10) : 0;
ArrayRef<char> sysimg_data{z->buf, (size_t)z->size};
SmallVector<char, 0> compressed_data;
if (compression) {
compressed_data.resize(ZSTD_compressBound(z->size));
size_t comp_size = ZSTD_compress(compressed_data.data(), compressed_data.size(),
z->buf, z->size, compression);
compressed_data.resize(comp_size);
sysimg_data = compressed_data;
ios_close(z);
free(z);
}

Constant *data = ConstantDataArray::get(Context, sysimg_data);
auto sysdata = new GlobalVariable(sysimgM, data->getType(), false,
GlobalVariable::ExternalLinkage,
data, "jl_system_image_data");
sysdata->setAlignment(Align(64));
sysdata->setAlignment(Align(jl_getpagesize()));
#if JL_LLVM_VERSION >= 180000
sysdata->setCodeModel(CodeModel::Large);
#else
if (TheTriple.isX86() && TheTriple.isArch64Bit() && TheTriple.isOSLinux())
sysdata->setSection(".ldata");
#endif
addComdat(sysdata, TheTriple);
Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), z->size);
Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), sysimg_data.size());
addComdat(new GlobalVariable(sysimgM, len->getType(), true,
GlobalVariable::ExternalLinkage,
len, "jl_system_image_size"), TheTriple);
// Free z here, since we've copied out everything into data
// Results in serious memory savings
ios_close(z);
free(z);

const char *unpack_func = compression ? "jl_image_unpack_zstd" : "jl_image_unpack_uncomp";
auto unpack = new GlobalVariable(sysimgM, DL.getIntPtrType(Context), true,
GlobalVariable::ExternalLinkage, nullptr,
unpack_func);
addComdat(new GlobalVariable(sysimgM, PointerType::getUnqual(Context), true,
GlobalVariable::ExternalLinkage, unpack,
"jl_image_unpack"),
TheTriple);

if (!compression) {
// Free z here, since we've copied out everything into data
// Results in serious memory savings
ios_close(z);
free(z);
}
compressed_data.clear();
// Note that we don't set z to null, this allows the check in WRITE_ARCHIVE
// to function as expected
// no need to free the module/context, destructor handles that
Expand Down
79 changes: 57 additions & 22 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ External links:
#include <stdio.h> // printf
#include <inttypes.h> // PRIxPTR

#include <zstd.h>

#include "julia.h"
#include "julia_internal.h"
#include "julia_gcext.h"
Expand All @@ -81,6 +83,7 @@ External links:

#ifndef _OS_WINDOWS_
#include <dlfcn.h>
#include <sys/mman.h>
#endif

#include "valgrind.h"
Expand Down Expand Up @@ -3638,14 +3641,43 @@ JL_DLLEXPORT jl_image_buf_t jl_preload_sysimg(const char *fname)
}
}

// From a shared library handle, verify consistency and return a jl_image_buf_t
static jl_image_buf_t get_image_buf(void *handle, int is_pkgimage)
typedef void jl_image_unpack_func_t(void *handle, jl_image_buf_t *image);

JL_DLLEXPORT void jl_image_unpack_uncomp(void *handle, jl_image_buf_t *image)
{
size_t *plen;
jl_dlsym(handle, "jl_system_image_size", (void **)&plen, 1);
jl_dlsym(handle, "jl_system_image_data", (void **)&image->data, 1);
jl_dlsym(handle, "jl_image_pointers", (void**)&image->pointers, 1);
image->size = *plen;
}

JL_DLLEXPORT void jl_image_unpack_zstd(void *handle, jl_image_buf_t *image)
{
size_t *plen;
const char *data;
const void *pointers;
uint64_t base;
jl_dlsym(handle, "jl_system_image_size", (void **)&plen, 1);
jl_dlsym(handle, "jl_system_image_data", (void **)&data, 1);
jl_dlsym(handle, "jl_image_pointers", (void**)&image->pointers, 1);

image->size = ZSTD_getFrameContentSize(data, *plen);
image->data = (char *)malloc(image->size);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably want to mmap this with huge pages/large pages

ZSTD_decompress((void *)image->data, image->size, data, *plen);
size_t len = (*plen) & ~(jl_getpagesize() - 1);
#ifdef _OS_WINDOWS_
if (len)
VirtualFree((void *)data, len, MEM_RELEASE);
#else
if (len && munmap((void *)data, len)) {
perror("munmap");
jl_exit(1);
}
#endif
}

// From a shared library handle, verify consistency and return a jl_image_buf_t
static jl_image_buf_t get_image_buf(void *handle, int is_pkgimage)
{
// verify that the linker resolved the symbols in this image against ourselves (libjulia-internal)
void** (*get_jl_RTLD_DEFAULT_handle_addr)(void) = NULL;
if (handle != jl_RTLD_DEFAULT_handle) {
Expand All @@ -3654,38 +3686,41 @@ static jl_image_buf_t get_image_buf(void *handle, int is_pkgimage)
jl_error("Image file failed consistency check: maybe opened the wrong version?");
}

jl_image_unpack_func_t **unpack;
jl_image_buf_t image = {
.kind = JL_IMAGE_KIND_SO,
.pointers = NULL,
.data = NULL,
.size = 0,
.base = 0,
};

// verification passed, lookup the buffer pointers
if (jl_system_image_size == 0 || is_pkgimage) {
// in the usual case, the sysimage was not statically linked to libjulia-internal
// look up the external sysimage symbols via the dynamic linker
jl_dlsym(handle, "jl_system_image_size", (void **)&plen, 1);
jl_dlsym(handle, "jl_system_image_data", (void **)&data, 1);
jl_dlsym(handle, "jl_image_pointers", (void**)&pointers, 1);
} else {
jl_dlsym(handle, "jl_image_unpack", (void **)&unpack, 1);
(*unpack)(handle, &image);
}
else {
// the sysimage was statically linked directly against libjulia-internal
// use the internal symbols
plen = &jl_system_image_size;
pointers = &jl_image_pointers;
data = &jl_system_image_data;
image.size = jl_system_image_size;
image.pointers = &jl_image_pointers;
image.data = &jl_system_image_data;
}

#ifdef _OS_WINDOWS_
base = (intptr_t)handle;
image.base = (intptr_t)handle;
#else
Dl_info dlinfo;
if (dladdr((void*)pointers, &dlinfo) != 0)
base = (intptr_t)dlinfo.dli_fbase;
if (dladdr((void*)image.pointers, &dlinfo) != 0)
image.base = (intptr_t)dlinfo.dli_fbase;
else
base = 0;
image.base = 0;
#endif

return (jl_image_buf_t) {
.kind = JL_IMAGE_KIND_SO,
.pointers = pointers,
.data = data,
.size = *plen,
.base = base,
};
return image;
}

// Allow passing in a module handle directly, rather than a path
Expand Down