Skip to content

Commit cbea8cf

Browse files
xal-0gbaraldi
andauthored
System image compression with zstd (#59227)
Revived version of #48244, with a slightly different approach. This version looks for a function pointer called `jl_image_unpack` inside compiled system images and invokes it to get the `jl_image_buf_t` struct. Two implementations, `jl_image_unpack_zstd` and `jl_image_unpack_uncomp` are provided (for comparison). The zstd compression is applied only to the heap image, and not the compiled code, since that can be shared across Julia processes. TODO: test a few different compression settings and enable by default. Example data from un-trimmed juliac "hello world": ``` 156M hello-uncomp 43M hello-zstd 48M hello-zstd-1 45M hello-zstd-5 43M hello-zstd-15 39M hello-zstd-22 $ hyperfine -w3 ./hello-uncomp Benchmark 1: ./hello-uncomp Time (mean ± σ): 74.4 ms ± 0.8 ms [User: 51.9 ms, System: 19.0 ms] Range (min … max): 73.0 ms … 76.6 ms 39 runs $ hyperfine -w3 ./hello-zstd-1 Benchmark 1: ./hello-zstd-1 Time (mean ± σ): 152.4 ms ± 0.5 ms [User: 138.2 ms, System: 12.0 ms] Range (min … max): 151.4 ms … 153.2 ms 19 runs $ hyperfine -w3 ./hello-zstd-5 Benchmark 1: ./hello-zstd-5 Time (mean ± σ): 154.3 ms ± 0.5 ms [User: 139.6 ms, System: 12.4 ms] Range (min … max): 153.5 ms … 155.2 ms 19 runs $ hyperfine -w3 ./hello-zstd-15 Benchmark 1: ./hello-zstd-15 Time (mean ± σ): 135.9 ms ± 0.5 ms [User: 121.6 ms, System: 12.0 ms] Range (min … max): 135.1 ms … 136.5 ms 21 runs $ hyperfine -w3 ./hello-zstd-22 Benchmark 1: ./hello-zstd-22 Time (mean ± σ): 149.0 ms ± 0.6 ms [User: 134.7 ms, System: 12.1 ms] Range (min … max): 147.7 ms … 150.4 ms 19 runs ``` --------- Co-authored-by: Gabriel Baraldi <[email protected]>
1 parent 0c1fab4 commit cbea8cf

File tree

8 files changed

+148
-35
lines changed

8 files changed

+148
-35
lines changed

Make.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1591,7 +1591,7 @@ JLDFLAGS += -Wl,--stack,8388608
15911591
ifeq ($(ARCH),i686)
15921592
JLDFLAGS += -Wl,--large-address-aware
15931593
endif
1594-
JCPPFLAGS += -D_WIN32_WINNT=0x0502
1594+
JCPPFLAGS += -D_WIN32_WINNT=_WIN32_WINNT_WIN8
15951595
UNTRUSTED_SYSTEM_LIBM := 1
15961596
# Use hard links for files on windows, rather than soft links
15971597
# https://stackoverflow.com/questions/3648819/how-to-make-a-symbolic-link-with-cygwin-in-windows-7

base/options.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ struct JLOptions
6767
task_metrics::Int8
6868
timeout_for_safepoint_straggler_s::Int16
6969
gc_sweep_always_full::Int8
70+
compress_sysimage::Int8
7071
end
7172

7273
# This runs early in the sysimage != is not defined yet

base/util.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,9 @@ function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Unio
245245
if opts.use_sysimage_native_code == 0
246246
push!(addflags, "--sysimage-native-code=no")
247247
end
248+
if opts.compress_sysimage == 1
249+
push!(addflags, "--compress-sysimage=yes")
250+
end
248251
return `$julia -C $cpu_target -J$image_file $addflags`
249252
end
250253

src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,7 @@ $(build_shlibdir)/lib%Plugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/%.cpp $(LLVM_CONFIG
524524
# before attempting this static analysis, so that all necessary headers
525525
# and dependencies are properly installed:
526526
# make -C src install-analysis-deps
527-
ANALYSIS_DEPS := llvm clang llvm-tools libuv utf8proc
527+
ANALYSIS_DEPS := llvm clang llvm-tools libuv utf8proc zstd
528528
ifeq ($(OS),Darwin)
529529
ANALYSIS_DEPS += llvmunwind
530530
else ifeq ($(OS),OpenBSD)

src/aotcompile.cpp

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,10 @@
4242
#include <llvm/Support/FormatAdapters.h>
4343
#include <llvm/Linker/Linker.h>
4444

45-
4645
using namespace llvm;
4746

47+
#include <zstd.h>
48+
4849
#include "jitlayers.h"
4950
#include "serialize.h"
5051
#include "julia_assert.h"
@@ -2148,27 +2149,53 @@ void jl_dump_native_impl(void *native_code,
21482149
sysimgM.setDataLayout(DL);
21492150
sysimgM.setStackProtectorGuard(StackProtectorGuard);
21502151
sysimgM.setOverrideStackAlignment(OverrideStackAlignment);
2151-
Constant *data = ConstantDataArray::get(Context,
2152-
ArrayRef<uint8_t>((const unsigned char*)z->buf, z->size));
2152+
2153+
int compression = jl_options.compress_sysimage ? 15 : 0;
2154+
ArrayRef<char> sysimg_data{z->buf, (size_t)z->size};
2155+
SmallVector<char, 0> compressed_data;
2156+
if (compression) {
2157+
compressed_data.resize(ZSTD_compressBound(z->size));
2158+
size_t comp_size = ZSTD_compress(compressed_data.data(), compressed_data.size(),
2159+
z->buf, z->size, compression);
2160+
compressed_data.resize(comp_size);
2161+
sysimg_data = compressed_data;
2162+
ios_close(z);
2163+
free(z);
2164+
}
2165+
2166+
Constant *data = ConstantDataArray::get(Context, sysimg_data);
21532167
auto sysdata = new GlobalVariable(sysimgM, data->getType(), false,
21542168
GlobalVariable::ExternalLinkage,
21552169
data, "jl_system_image_data");
2156-
sysdata->setAlignment(Align(64));
2170+
sysdata->setAlignment(Align(jl_page_size));
21572171
#if JL_LLVM_VERSION >= 180000
21582172
sysdata->setCodeModel(CodeModel::Large);
21592173
#else
21602174
if (TheTriple.isX86() && TheTriple.isArch64Bit() && TheTriple.isOSLinux())
21612175
sysdata->setSection(".ldata");
21622176
#endif
21632177
addComdat(sysdata, TheTriple);
2164-
Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), z->size);
2178+
Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), sysimg_data.size());
21652179
addComdat(new GlobalVariable(sysimgM, len->getType(), true,
21662180
GlobalVariable::ExternalLinkage,
21672181
len, "jl_system_image_size"), TheTriple);
2168-
// Free z here, since we've copied out everything into data
2169-
// Results in serious memory savings
2170-
ios_close(z);
2171-
free(z);
2182+
2183+
const char *unpack_func = compression ? "jl_image_unpack_zstd" : "jl_image_unpack_uncomp";
2184+
auto unpack = new GlobalVariable(sysimgM, DL.getIntPtrType(Context), true,
2185+
GlobalVariable::ExternalLinkage, nullptr,
2186+
unpack_func);
2187+
addComdat(new GlobalVariable(sysimgM, PointerType::getUnqual(Context), true,
2188+
GlobalVariable::ExternalLinkage, unpack,
2189+
"jl_image_unpack"),
2190+
TheTriple);
2191+
2192+
if (!compression) {
2193+
// Free z here, since we've copied out everything into data
2194+
// Results in serious memory savings
2195+
ios_close(z);
2196+
free(z);
2197+
}
2198+
compressed_data.clear();
21722199
// Note that we don't set z to null, this allows the check in WRITE_ARCHIVE
21732200
// to function as expected
21742201
// no need to free the module/context, destructor handles that

src/jloptions.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ JL_DLLEXPORT void jl_init_options(void)
160160
0, // task_metrics
161161
-1, // timeout_for_safepoint_straggler_s
162162
0, // gc_sweep_always_full
163+
0, // compress_sysimage
163164
};
164165
jl_options_initialized = 1;
165166
}
@@ -311,7 +312,10 @@ static const char opts_hidden[] =
311312
" --strip-metadata Remove docstrings and source location info from\n"
312313
" system image\n"
313314
" --strip-ir Remove IR (intermediate representation) of compiled\n"
314-
" functions\n\n"
315+
" functions\n"
316+
" --compress-sysimage={yes|no*} Compress the sys/pkgimage heap at the expense of\n"
317+
" slightly increased load time.\n"
318+
"\n"
315319

316320
// compiler debugging and experimental (see the devdocs for tips on using these options)
317321
" --experimental Enable the use of experimental (alpha) features\n"
@@ -407,6 +411,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
407411
opt_permalloc_pkgimg,
408412
opt_trim,
409413
opt_experimental_features,
414+
opt_compress_sysimage,
410415
};
411416
static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:m:";
412417
static const struct option longopts[] = {
@@ -478,6 +483,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
478483
{ "heap-target-increment", required_argument, 0, opt_heap_target_increment },
479484
{ "gc-sweep-always-full", no_argument, 0, opt_gc_sweep_always_full },
480485
{ "trim", optional_argument, 0, opt_trim },
486+
{ "compress-sysimage", required_argument, 0, opt_compress_sysimage },
481487
{ 0, 0, 0, 0 }
482488
};
483489

@@ -1060,6 +1066,12 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
10601066
else
10611067
jl_errorf("julia: invalid argument to --task-metrics={yes|no} (%s)", optarg);
10621068
break;
1069+
case opt_compress_sysimage:
1070+
if (!strcmp(optarg,"yes"))
1071+
jl_options.compress_sysimage = 1;
1072+
else if (!strcmp(optarg,"no"))
1073+
jl_options.compress_sysimage = 0;
1074+
break;
10631075
default:
10641076
jl_errorf("julia: unhandled option -- %c\n"
10651077
"This is a bug, please report it.", c);

src/jloptions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ typedef struct {
7171
int8_t task_metrics;
7272
int16_t timeout_for_safepoint_straggler_s;
7373
int8_t gc_sweep_always_full;
74+
int8_t compress_sysimage;
7475
} jl_options_t;
7576

7677
#endif

src/staticdata.c

Lines changed: 92 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,20 @@ External links:
7272
#include <stdio.h> // printf
7373
#include <inttypes.h> // PRIxPTR
7474

75+
#include <zstd.h>
76+
7577
#include "julia.h"
7678
#include "julia_internal.h"
7779
#include "julia_gcext.h"
7880
#include "builtin_proto.h"
7981
#include "processor.h"
8082
#include "serialize.h"
8183

82-
#ifndef _OS_WINDOWS_
84+
#ifdef _OS_WINDOWS_
85+
#include <memoryapi.h>
86+
#else
8387
#include <dlfcn.h>
88+
#include <sys/mman.h>
8489
#endif
8590

8691
#include "valgrind.h"
@@ -3630,14 +3635,75 @@ JL_DLLEXPORT jl_image_buf_t jl_preload_sysimg(const char *fname)
36303635
}
36313636
}
36323637

3633-
// From a shared library handle, verify consistency and return a jl_image_buf_t
3634-
static jl_image_buf_t get_image_buf(void *handle, int is_pkgimage)
3638+
typedef void jl_image_unpack_func_t(void *handle, jl_image_buf_t *image);
3639+
3640+
static void jl_prefetch_system_image(const char *data, size_t size)
3641+
{
3642+
size_t page_size = jl_getpagesize(); /* jl_page_size is not set yet when loading sysimg */
3643+
void *start = (void *)((uintptr_t)data & ~(page_size - 1));
3644+
size_t size_aligned = LLT_ALIGN(size, page_size);
3645+
#ifdef _OS_WINDOWS_
3646+
WIN32_MEMORY_RANGE_ENTRY entry = {start, size_aligned};
3647+
PrefetchVirtualMemory(GetCurrentProcess(), 1, &entry, 0);
3648+
#else
3649+
madvise(start, size_aligned, MADV_WILLNEED);
3650+
#endif
3651+
}
3652+
3653+
JL_DLLEXPORT void jl_image_unpack_uncomp(void *handle, jl_image_buf_t *image)
3654+
{
3655+
size_t *plen;
3656+
jl_dlsym(handle, "jl_system_image_size", (void **)&plen, 1);
3657+
jl_dlsym(handle, "jl_system_image_data", (void **)&image->data, 1);
3658+
jl_dlsym(handle, "jl_image_pointers", (void**)&image->pointers, 1);
3659+
image->size = *plen;
3660+
jl_prefetch_system_image(image->data, image->size);
3661+
}
3662+
3663+
JL_DLLEXPORT void jl_image_unpack_zstd(void *handle, jl_image_buf_t *image)
36353664
{
36363665
size_t *plen;
36373666
const char *data;
3638-
const void *pointers;
3639-
uint64_t base;
3667+
jl_dlsym(handle, "jl_system_image_size", (void **)&plen, 1);
3668+
jl_dlsym(handle, "jl_system_image_data", (void **)&data, 1);
3669+
jl_dlsym(handle, "jl_image_pointers", (void **)&image->pointers, 1);
3670+
jl_prefetch_system_image(data, *plen);
3671+
image->size = ZSTD_getFrameContentSize(data, *plen);
3672+
size_t page_size = jl_getpagesize(); /* jl_page_size is not set yet when loading sysimg */
3673+
size_t aligned_size = LLT_ALIGN(image->size, page_size);
3674+
#if defined(_OS_WINDOWS_)
3675+
size_t large_page_size = GetLargePageMinimum();
3676+
if (image->size > 4 * large_page_size) {
3677+
size_t aligned_size = LLT_ALIGN(image->size, large_page_size);
3678+
image->data = (char *)VirtualAlloc(
3679+
NULL, aligned_size, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
3680+
}
3681+
else {
3682+
image->data = (char *)VirtualAlloc(NULL, aligned_size, MEM_COMMIT | MEM_RESERVE,
3683+
PAGE_READWRITE);
3684+
}
3685+
#else
3686+
image->data =
3687+
(char *)mmap(NULL, aligned_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
3688+
#endif
3689+
if (!image->data || image->data == (void *)-1) {
3690+
jl_printf(JL_STDERR, "ERROR: failed to allocate space for system image\n");
3691+
jl_exit(1);
3692+
}
3693+
3694+
ZSTD_decompress((void *)image->data, image->size, data, *plen);
3695+
size_t len = (*plen) & ~(page_size - 1);
3696+
#ifdef _OS_WINDOWS_
3697+
if (len)
3698+
VirtualFree((void *)data, len, MEM_RELEASE);
3699+
#else
3700+
munmap((void *)data, len);
3701+
#endif
3702+
}
36403703

3704+
// From a shared library handle, verify consistency and return a jl_image_buf_t
3705+
static jl_image_buf_t get_image_buf(void *handle, int is_pkgimage)
3706+
{
36413707
// verify that the linker resolved the symbols in this image against ourselves (libjulia-internal)
36423708
void** (*get_jl_RTLD_DEFAULT_handle_addr)(void) = NULL;
36433709
if (handle != jl_RTLD_DEFAULT_handle) {
@@ -3646,38 +3712,41 @@ static jl_image_buf_t get_image_buf(void *handle, int is_pkgimage)
36463712
jl_error("Image file failed consistency check: maybe opened the wrong version?");
36473713
}
36483714

3715+
jl_image_unpack_func_t **unpack;
3716+
jl_image_buf_t image = {
3717+
.kind = JL_IMAGE_KIND_SO,
3718+
.pointers = NULL,
3719+
.data = NULL,
3720+
.size = 0,
3721+
.base = 0,
3722+
};
3723+
36493724
// verification passed, lookup the buffer pointers
36503725
if (jl_system_image_size == 0 || is_pkgimage) {
36513726
// in the usual case, the sysimage was not statically linked to libjulia-internal
36523727
// look up the external sysimage symbols via the dynamic linker
3653-
jl_dlsym(handle, "jl_system_image_size", (void **)&plen, 1);
3654-
jl_dlsym(handle, "jl_system_image_data", (void **)&data, 1);
3655-
jl_dlsym(handle, "jl_image_pointers", (void**)&pointers, 1);
3656-
} else {
3728+
jl_dlsym(handle, "jl_image_unpack", (void **)&unpack, 1);
3729+
(*unpack)(handle, &image);
3730+
}
3731+
else {
36573732
// the sysimage was statically linked directly against libjulia-internal
36583733
// use the internal symbols
3659-
plen = &jl_system_image_size;
3660-
pointers = &jl_image_pointers;
3661-
data = &jl_system_image_data;
3734+
image.size = jl_system_image_size;
3735+
image.pointers = &jl_image_pointers;
3736+
image.data = &jl_system_image_data;
36623737
}
36633738

36643739
#ifdef _OS_WINDOWS_
3665-
base = (intptr_t)handle;
3740+
image.base = (intptr_t)handle;
36663741
#else
36673742
Dl_info dlinfo;
3668-
if (dladdr((void*)pointers, &dlinfo) != 0)
3669-
base = (intptr_t)dlinfo.dli_fbase;
3743+
if (dladdr((void*)image.pointers, &dlinfo) != 0)
3744+
image.base = (intptr_t)dlinfo.dli_fbase;
36703745
else
3671-
base = 0;
3746+
image.base = 0;
36723747
#endif
36733748

3674-
return (jl_image_buf_t) {
3675-
.kind = JL_IMAGE_KIND_SO,
3676-
.pointers = pointers,
3677-
.data = data,
3678-
.size = *plen,
3679-
.base = base,
3680-
};
3749+
return image;
36813750
}
36823751

36833752
// Allow passing in a module handle directly, rather than a path

0 commit comments

Comments
 (0)