-
Notifications
You must be signed in to change notification settings - Fork 343
fix: Replace in-memory bspatch output buffer with a memory-mapped tem… #3554
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
22f616f
85f906b
55f9542
6d4824f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ | |
| #include "config.h" | ||
|
|
||
| #include <string.h> | ||
| #include <sys/mman.h> | ||
|
|
||
| #include <gio/gfiledescriptorbased.h> | ||
| #include <gio/gunixinputstream.h> | ||
|
|
@@ -414,7 +415,39 @@ dispatch_bspatch (OstreeRepo *repo, StaticDeltaExecutionState *state, GCancellab | |
| if (!input_mfile) | ||
| return FALSE; | ||
|
|
||
| g_autofree guchar *buf = g_malloc0 (state->content_size); | ||
| /* Instead of allocating the entire content_size in memory, use a tmpfile | ||
| * that can be paged to disk by the OS if needed. This prevents OOM on | ||
| * systems with limited RAM when processing large files. | ||
| * See: https://github.com/flatpak/flatpak/issues/6255 | ||
| */ | ||
| void *buf; | ||
| g_auto (GLnxTmpfile) tmpf = { | ||
| 0, | ||
| }; | ||
|
|
||
| if (state->content_size > 0) | ||
| { | ||
| if (!glnx_open_anonymous_tmpfile (O_RDWR | O_CLOEXEC, &tmpf, error)) | ||
| return FALSE; | ||
|
|
||
| /* Resize tmpfile to content_size */ | ||
| if (ftruncate (tmpf.fd, state->content_size) < 0) | ||
| return glnx_throw_errno_prefix (error, "ftruncate"); | ||
|
|
||
| /* Memory-map the tmpfile for bspatch to write to */ | ||
| buf = mmap (NULL, state->content_size, PROT_READ | PROT_WRITE, MAP_SHARED, tmpf.fd, 0); | ||
| if (buf == MAP_FAILED) | ||
| return glnx_throw_errno_prefix (error, "mmap"); | ||
| } | ||
| else | ||
| { | ||
| /* mmap() with a zero size fails with EINVAL. The old code with | ||
| * g_malloc0(0) worked by returning a valid pointer. bspatch might | ||
| * not like a NULL buffer, so provide a dummy buffer. | ||
| */ | ||
| static guchar dummy_buf; | ||
| buf = &dummy_buf; | ||
| } | ||
|
|
||
| struct bzpatch_opaque_s opaque; | ||
| opaque.state = state; | ||
|
|
@@ -423,14 +456,48 @@ dispatch_bspatch (OstreeRepo *repo, StaticDeltaExecutionState *state, GCancellab | |
| struct bspatch_stream stream; | ||
| stream.read = bspatch_read; | ||
| stream.opaque = &opaque; | ||
| if (bspatch ((const guint8 *)g_mapped_file_get_contents (input_mfile), | ||
| g_mapped_file_get_length (input_mfile), buf, state->content_size, &stream) | ||
| < 0) | ||
|
|
||
| int bspatch_result | ||
| = bspatch ((const guint8 *)g_mapped_file_get_contents (input_mfile), | ||
| g_mapped_file_get_length (input_mfile), buf, state->content_size, &stream); | ||
|
|
||
| /* Unmap before checking result - only if we actually created a mapping */ | ||
| if (state->content_size > 0 && munmap (buf, state->content_size) < 0) | ||
| return glnx_throw_errno_prefix (error, "munmap"); | ||
|
|
||
| if (bspatch_result < 0) | ||
| return glnx_throw (error, "bsdiff patch failed"); | ||
|
|
||
| if (!_ostree_repo_bare_content_write (repo, &state->content_out, buf, state->content_size, | ||
| cancellable, error)) | ||
| return FALSE; | ||
| /* Now read from tmpfile and write to repository (if content_size > 0) */ | ||
| if (state->content_size > 0) | ||
| { | ||
| if (lseek (tmpf.fd, 0, SEEK_SET) < 0) | ||
| return glnx_throw_errno_prefix (error, "lseek"); | ||
|
|
||
| /* Read and write in chunks to avoid loading entire file into memory */ | ||
| guint64 bytes_remaining = state->content_size; | ||
| while (bytes_remaining > 0) | ||
| { | ||
| guchar chunk_buf[8192]; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you considered bigger block sizes like experimented with in |
||
| gsize chunk_size = MIN (sizeof (chunk_buf), bytes_remaining); | ||
| gssize bytes_read; | ||
|
|
||
| do | ||
| bytes_read = read (tmpf.fd, chunk_buf, chunk_size); | ||
| while (G_UNLIKELY (bytes_read == -1 && errno == EINTR)); | ||
|
Comment on lines
+485
to
+487
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While reading, this chunk should not be "long enough" for a badly placed interrupt, you are going to retry it. Can you consider also patching |
||
|
|
||
| if (bytes_read < 0) | ||
| return glnx_throw_errno_prefix (error, "read"); | ||
| if (bytes_read == 0) | ||
| return glnx_throw (error, "Unexpected EOF reading from tmpfile"); | ||
|
|
||
| if (!_ostree_repo_bare_content_write (repo, &state->content_out, chunk_buf, | ||
| bytes_read, cancellable, error)) | ||
| return FALSE; | ||
|
|
||
| bytes_remaining -= bytes_read; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return TRUE; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why was this done? Previously, the (potentially) large output buffer was used to be written into the repository via
_ostree_repo_bare_content_write. Now, this code loads the patched content chunk-based into memory and then writes it to disk. But, as the source was changed to be potentially memory-backed, this is not needed - I would even say it introduced an unnecessary copy?Furthermore, the
glnx_loop_writepart of_ostree_repo_bare_content_writenow may performs additional calls towrite, as the chuck sizes may not align with its retry-logic as from https://github.com/GNOME/libglnx/blob/de29c5f7d9df8d57b4f5caa9920f5d4caa7a8cfc/glnx-fdio.c#L752