Skip to content

Commit 2b6070a

Browse files
chiyutianyigitster
authored andcommitted
object-file.c: add "stream_loose_object()" to handle large object
If we want unpack and write a loose object using "write_loose_object", we have to feed it with a buffer with the same size of the object, which will consume lots of memory and may cause OOM. This can be improved by feeding data to "stream_loose_object()" in a stream. Add a new function "stream_loose_object()", which is a stream version of "write_loose_object()" but with a low memory footprint. We will use this function to unpack large blob object in later commit. Another difference with "write_loose_object()" is that we have no chance to run "write_object_file_prepare()" to calculate the oid in advance. In "write_loose_object()", we know the oid and we can write the temporary file in the same directory as the final object, but for an object with an undetermined oid, we don't know the exact directory for the object. Still, we need to save the temporary file we're preparing somewhere. We'll do that in the top-level ".git/objects/" directory (or whatever "GIT_OBJECT_DIRECTORY" is set to). Once we've streamed it we'll know the OID, and will move it to its canonical path. "freshen_packed_object()" or "freshen_loose_object()" will be called inside "stream_loose_object()" after obtaining the "oid". After the temporary file is written, we wants to mark the object to recent and we may find that where indeed is already the object. We should remove the temporary and do not leave a new copy of the object. Helped-by: René Scharfe <[email protected]> Helped-by: Ævar Arnfjörð Bjarmason <[email protected]> Helped-by: Jiang Xin <[email protected]> Signed-off-by: Han Xin <[email protected]> Signed-off-by: Ævar Arnfjörð Bjarmason <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 21e7d88 commit 2b6070a

File tree

2 files changed

+112
-0
lines changed

2 files changed

+112
-0
lines changed

object-file.c

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2119,6 +2119,110 @@ static int freshen_packed_object(const struct object_id *oid)
21192119
return 1;
21202120
}
21212121

2122+
int stream_loose_object(struct input_stream *in_stream, size_t len,
2123+
struct object_id *oid)
2124+
{
2125+
int fd, ret, err = 0, flush = 0;
2126+
unsigned char compressed[4096];
2127+
git_zstream stream;
2128+
git_hash_ctx c;
2129+
struct strbuf tmp_file = STRBUF_INIT;
2130+
struct strbuf filename = STRBUF_INIT;
2131+
int dirlen;
2132+
char hdr[MAX_HEADER_LEN];
2133+
int hdrlen;
2134+
2135+
if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
2136+
prepare_loose_object_bulk_checkin();
2137+
2138+
/* Since oid is not determined, save tmp file to odb path. */
2139+
strbuf_addf(&filename, "%s/", get_object_directory());
2140+
hdrlen = format_object_header(hdr, sizeof(hdr), OBJ_BLOB, len);
2141+
2142+
/*
2143+
* Common steps for write_loose_object and stream_loose_object to
2144+
* start writing loose objects:
2145+
*
2146+
* - Create tmpfile for the loose object.
2147+
* - Setup zlib stream for compression.
2148+
* - Start to feed header to zlib stream.
2149+
*/
2150+
fd = start_loose_object_common(&tmp_file, filename.buf, 0,
2151+
&stream, compressed, sizeof(compressed),
2152+
&c, hdr, hdrlen);
2153+
if (fd < 0) {
2154+
err = -1;
2155+
goto cleanup;
2156+
}
2157+
2158+
/* Then the data itself.. */
2159+
do {
2160+
unsigned char *in0 = stream.next_in;
2161+
2162+
if (!stream.avail_in && !in_stream->is_finished) {
2163+
const void *in = in_stream->read(in_stream, &stream.avail_in);
2164+
stream.next_in = (void *)in;
2165+
in0 = (unsigned char *)in;
2166+
/* All data has been read. */
2167+
if (in_stream->is_finished)
2168+
flush = 1;
2169+
}
2170+
ret = write_loose_object_common(&c, &stream, flush, in0, fd,
2171+
compressed, sizeof(compressed));
2172+
/*
2173+
* Unlike write_loose_object(), we do not have the entire
2174+
* buffer. If we get Z_BUF_ERROR due to too few input bytes,
2175+
* then we'll replenish them in the next input_stream->read()
2176+
* call when we loop.
2177+
*/
2178+
} while (ret == Z_OK || ret == Z_BUF_ERROR);
2179+
2180+
if (stream.total_in != len + hdrlen)
2181+
die(_("write stream object %ld != %"PRIuMAX), stream.total_in,
2182+
(uintmax_t)len + hdrlen);
2183+
2184+
/*
2185+
* Common steps for write_loose_object and stream_loose_object to
2186+
* end writing loose oject:
2187+
*
2188+
* - End the compression of zlib stream.
2189+
* - Get the calculated oid.
2190+
*/
2191+
if (ret != Z_STREAM_END)
2192+
die(_("unable to stream deflate new object (%d)"), ret);
2193+
ret = end_loose_object_common(&c, &stream, oid);
2194+
if (ret != Z_OK)
2195+
die(_("deflateEnd on stream object failed (%d)"), ret);
2196+
close_loose_object(fd, tmp_file.buf);
2197+
2198+
if (freshen_packed_object(oid) || freshen_loose_object(oid)) {
2199+
unlink_or_warn(tmp_file.buf);
2200+
goto cleanup;
2201+
}
2202+
2203+
loose_object_path(the_repository, &filename, oid);
2204+
2205+
/* We finally know the object path, and create the missing dir. */
2206+
dirlen = directory_size(filename.buf);
2207+
if (dirlen) {
2208+
struct strbuf dir = STRBUF_INIT;
2209+
strbuf_add(&dir, filename.buf, dirlen);
2210+
2211+
if (mkdir_in_gitdir(dir.buf) && errno != EEXIST) {
2212+
err = error_errno(_("unable to create directory %s"), dir.buf);
2213+
strbuf_release(&dir);
2214+
goto cleanup;
2215+
}
2216+
strbuf_release(&dir);
2217+
}
2218+
2219+
err = finalize_object_file(tmp_file.buf, filename.buf);
2220+
cleanup:
2221+
strbuf_release(&tmp_file);
2222+
strbuf_release(&filename);
2223+
return err;
2224+
}
2225+
21222226
int write_object_file_flags(const void *buf, unsigned long len,
21232227
enum object_type type, struct object_id *oid,
21242228
unsigned flags)

object-store.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ struct object_directory {
4646
char *path;
4747
};
4848

49+
struct input_stream {
50+
const void *(*read)(struct input_stream *, unsigned long *len);
51+
void *data;
52+
int is_finished;
53+
};
54+
4955
KHASH_INIT(odb_path_map, const char * /* key: odb_path */,
5056
struct object_directory *, 1, fspathhash, fspatheq)
5157

@@ -269,6 +275,8 @@ static inline int write_object_file(const void *buf, unsigned long len,
269275
int write_object_file_literally(const void *buf, unsigned long len,
270276
const char *type, struct object_id *oid,
271277
unsigned flags);
278+
int stream_loose_object(struct input_stream *in_stream, size_t len,
279+
struct object_id *oid);
272280

273281
/*
274282
* Add an object file to the in-memory object store, without writing it

0 commit comments

Comments
 (0)