Skip to content

Commit bc30a2f

Browse files
pks-tgitster
authored andcommitted
streaming: move logic to read loose objects streams into backend
Move the logic to read loose object streams into the respective subsystem. This allows us to make a couple of function declarations private. Signed-off-by: Patrick Steinhardt <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent ffc9a34 commit bc30a2f

File tree

3 files changed

+164
-178
lines changed

3 files changed

+164
-178
lines changed

object-file.c

Lines changed: 158 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,9 @@ static void *map_fd(int fd, const char *path, unsigned long *size)
234234
return map;
235235
}
236236

237-
void *odb_source_loose_map_object(struct odb_source *source,
238-
const struct object_id *oid,
239-
unsigned long *size)
237+
static void *odb_source_loose_map_object(struct odb_source *source,
238+
const struct object_id *oid,
239+
unsigned long *size)
240240
{
241241
const char *p;
242242
int fd = open_loose_object(source->loose, oid, &p);
@@ -246,11 +246,29 @@ void *odb_source_loose_map_object(struct odb_source *source,
246246
return map_fd(fd, p, size);
247247
}
248248

249-
enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
250-
unsigned char *map,
251-
unsigned long mapsize,
252-
void *buffer,
253-
unsigned long bufsiz)
249+
enum unpack_loose_header_result {
250+
ULHR_OK,
251+
ULHR_BAD,
252+
ULHR_TOO_LONG,
253+
};
254+
255+
/**
256+
* unpack_loose_header() initializes the data stream needed to unpack
257+
* a loose object header.
258+
*
259+
* Returns:
260+
*
261+
* - ULHR_OK on success
262+
* - ULHR_BAD on error
263+
* - ULHR_TOO_LONG if the header was too long
264+
*
265+
* It will only parse up to MAX_HEADER_LEN bytes.
266+
*/
267+
static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
268+
unsigned char *map,
269+
unsigned long mapsize,
270+
void *buffer,
271+
unsigned long bufsiz)
254272
{
255273
int status;
256274

@@ -329,11 +347,18 @@ static void *unpack_loose_rest(git_zstream *stream,
329347
}
330348

331349
/*
350+
* parse_loose_header() parses the starting "<type> <len>\0" of an
351+
* object. If it doesn't follow that format -1 is returned. To check
352+
* the validity of the <type> populate the "typep" in the "struct
353+
* object_info". It will be OBJ_BAD if the object type is unknown. The
354+
* parsed <len> can be retrieved via "oi->sizep", and from there
355+
* passed to unpack_loose_rest().
356+
*
332357
* We used to just use "sscanf()", but that's actually way
333358
* too permissive for what we want to check. So do an anal
334359
* object header parse by hand.
335360
*/
336-
int parse_loose_header(const char *hdr, struct object_info *oi)
361+
static int parse_loose_header(const char *hdr, struct object_info *oi)
337362
{
338363
const char *type_buf = hdr;
339364
size_t size;
@@ -1976,3 +2001,127 @@ void odb_source_loose_free(struct odb_source_loose *loose)
19762001
loose_object_map_clear(&loose->map);
19772002
free(loose);
19782003
}
2004+
2005+
struct odb_loose_read_stream {
2006+
struct odb_read_stream base;
2007+
git_zstream z;
2008+
enum {
2009+
ODB_LOOSE_READ_STREAM_INUSE,
2010+
ODB_LOOSE_READ_STREAM_DONE,
2011+
ODB_LOOSE_READ_STREAM_ERROR,
2012+
} z_state;
2013+
void *mapped;
2014+
unsigned long mapsize;
2015+
char hdr[32];
2016+
int hdr_avail;
2017+
int hdr_used;
2018+
};
2019+
2020+
static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
2021+
{
2022+
struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
2023+
size_t total_read = 0;
2024+
2025+
switch (st->z_state) {
2026+
case ODB_LOOSE_READ_STREAM_DONE:
2027+
return 0;
2028+
case ODB_LOOSE_READ_STREAM_ERROR:
2029+
return -1;
2030+
default:
2031+
break;
2032+
}
2033+
2034+
if (st->hdr_used < st->hdr_avail) {
2035+
size_t to_copy = st->hdr_avail - st->hdr_used;
2036+
if (sz < to_copy)
2037+
to_copy = sz;
2038+
memcpy(buf, st->hdr + st->hdr_used, to_copy);
2039+
st->hdr_used += to_copy;
2040+
total_read += to_copy;
2041+
}
2042+
2043+
while (total_read < sz) {
2044+
int status;
2045+
2046+
st->z.next_out = (unsigned char *)buf + total_read;
2047+
st->z.avail_out = sz - total_read;
2048+
status = git_inflate(&st->z, Z_FINISH);
2049+
2050+
total_read = st->z.next_out - (unsigned char *)buf;
2051+
2052+
if (status == Z_STREAM_END) {
2053+
git_inflate_end(&st->z);
2054+
st->z_state = ODB_LOOSE_READ_STREAM_DONE;
2055+
break;
2056+
}
2057+
if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
2058+
git_inflate_end(&st->z);
2059+
st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
2060+
return -1;
2061+
}
2062+
}
2063+
return total_read;
2064+
}
2065+
2066+
static int close_istream_loose(struct odb_read_stream *_st)
2067+
{
2068+
struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
2069+
if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
2070+
git_inflate_end(&st->z);
2071+
munmap(st->mapped, st->mapsize);
2072+
return 0;
2073+
}
2074+
2075+
int odb_source_loose_read_object_stream(struct odb_read_stream **out,
2076+
struct odb_source *source,
2077+
const struct object_id *oid)
2078+
{
2079+
struct object_info oi = OBJECT_INFO_INIT;
2080+
struct odb_loose_read_stream *st;
2081+
unsigned long mapsize;
2082+
void *mapped;
2083+
2084+
mapped = odb_source_loose_map_object(source, oid, &mapsize);
2085+
if (!mapped)
2086+
return -1;
2087+
2088+
/*
2089+
* Note: we must allocate this structure early even though we may still
2090+
* fail. This is because we need to initialize the zlib stream, and it
2091+
* is not possible to copy the stream around after the fact because it
2092+
* has self-referencing pointers.
2093+
*/
2094+
CALLOC_ARRAY(st, 1);
2095+
2096+
switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
2097+
sizeof(st->hdr))) {
2098+
case ULHR_OK:
2099+
break;
2100+
case ULHR_BAD:
2101+
case ULHR_TOO_LONG:
2102+
goto error;
2103+
}
2104+
2105+
oi.sizep = &st->base.size;
2106+
oi.typep = &st->base.type;
2107+
2108+
if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
2109+
goto error;
2110+
2111+
st->mapped = mapped;
2112+
st->mapsize = mapsize;
2113+
st->hdr_used = strlen(st->hdr) + 1;
2114+
st->hdr_avail = st->z.total_out;
2115+
st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
2116+
st->base.close = close_istream_loose;
2117+
st->base.read = read_istream_loose;
2118+
2119+
*out = &st->base;
2120+
2121+
return 0;
2122+
error:
2123+
git_inflate_end(&st->z);
2124+
munmap(st->mapped, st->mapsize);
2125+
free(st);
2126+
return -1;
2127+
}

object-file.h

Lines changed: 5 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ enum {
1616
int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
1717
int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags);
1818

19+
struct object_info;
20+
struct odb_read_stream;
1921
struct odb_source;
2022

2123
struct odb_source_loose {
@@ -47,9 +49,9 @@ int odb_source_loose_read_object_info(struct odb_source *source,
4749
const struct object_id *oid,
4850
struct object_info *oi, int flags);
4951

50-
void *odb_source_loose_map_object(struct odb_source *source,
51-
const struct object_id *oid,
52-
unsigned long *size);
52+
int odb_source_loose_read_object_stream(struct odb_read_stream **out,
53+
struct odb_source *source,
54+
const struct object_id *oid);
5355

5456
/*
5557
* Return true iff an object database source has a loose object
@@ -143,40 +145,6 @@ int for_each_loose_object(struct object_database *odb,
143145
int format_object_header(char *str, size_t size, enum object_type type,
144146
size_t objsize);
145147

146-
/**
147-
* unpack_loose_header() initializes the data stream needed to unpack
148-
* a loose object header.
149-
*
150-
* Returns:
151-
*
152-
* - ULHR_OK on success
153-
* - ULHR_BAD on error
154-
* - ULHR_TOO_LONG if the header was too long
155-
*
156-
* It will only parse up to MAX_HEADER_LEN bytes.
157-
*/
158-
enum unpack_loose_header_result {
159-
ULHR_OK,
160-
ULHR_BAD,
161-
ULHR_TOO_LONG,
162-
};
163-
enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
164-
unsigned char *map,
165-
unsigned long mapsize,
166-
void *buffer,
167-
unsigned long bufsiz);
168-
169-
/**
170-
* parse_loose_header() parses the starting "<type> <len>\0" of an
171-
* object. If it doesn't follow that format -1 is returned. To check
172-
* the validity of the <type> populate the "typep" in the "struct
173-
* object_info". It will be OBJ_BAD if the object type is unknown. The
174-
* parsed <len> can be retrieved via "oi->sizep", and from there
175-
* passed to unpack_loose_rest().
176-
*/
177-
struct object_info;
178-
int parse_loose_header(const char *hdr, struct object_info *oi);
179-
180148
int force_object_loose(struct odb_source *source,
181149
const struct object_id *oid, time_t mtime);
182150

0 commit comments

Comments
 (0)