Skip to content

Commit 5316f76

Browse files
committed
Merge branch 'ew/cat-file-optim' into jch
"git cat-file --batch" has been optimized. * ew/cat-file-optim: cat-file: use writev(2) if available cat-file: batch_write: use size_t for length cat-file: batch-command uses content_limit object_info: content_limit only applies to blobs packfile: packed_object_info avoids packed_to_object_type cat-file: use delta_base_cache entries directly packfile: inline cache_or_unpack_entry packfile: fix off-by-one in content_limit comparison packfile: allow content-limit for cat-file packfile: move sizep computation
2 parents dc774bc + f043683 commit 5316f76

File tree

13 files changed

+312
-83
lines changed

13 files changed

+312
-83
lines changed

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1865,6 +1865,9 @@ ifdef NO_PREAD
18651865
COMPAT_CFLAGS += -DNO_PREAD
18661866
COMPAT_OBJS += compat/pread.o
18671867
endif
1868+
ifdef HAVE_WRITEV
1869+
COMPAT_CFLAGS += -DHAVE_WRITEV
1870+
endif
18681871
ifdef NO_FAST_WORKING_DIRECTORY
18691872
BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY
18701873
endif

builtin/cat-file.c

Lines changed: 87 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ struct expand_data {
280280
off_t disk_size;
281281
const char *rest;
282282
struct object_id delta_base_oid;
283+
struct git_iovec iov[3];
283284

284285
/*
285286
* If mark_query is true, we do not expand anything, but rather
@@ -368,7 +369,7 @@ static void expand_format(struct strbuf *sb, const char *start,
368369
}
369370
}
370371

371-
static void batch_write(struct batch_options *opt, const void *data, int len)
372+
static void batch_write(struct batch_options *opt, const void *data, size_t len)
372373
{
373374
if (opt->buffer_output) {
374375
if (fwrite(data, 1, len, stdout) != len)
@@ -377,15 +378,72 @@ static void batch_write(struct batch_options *opt, const void *data, int len)
377378
write_or_die(1, data, len);
378379
}
379380

380-
static void print_object_or_die(struct batch_options *opt, struct expand_data *data)
381+
static void batch_writev(struct batch_options *opt, struct expand_data *data,
382+
const struct strbuf *hdr, size_t size)
383+
{
384+
data->iov[0].iov_base = hdr->buf;
385+
data->iov[0].iov_len = hdr->len;
386+
data->iov[1].iov_len = size;
387+
388+
/*
389+
* Copying a (8|16)-byte iovec for a single byte is gross, but my
390+
* attempt to stuff output_delim into the trailing NUL byte of
391+
* iov[1].iov_base (and restoring it after writev(2) for the
392+
* OI_DBCACHED case) to drop iovcnt from 3->2 wasn't faster.
393+
*/
394+
data->iov[2].iov_base = &opt->output_delim;
395+
data->iov[2].iov_len = 1;
396+
397+
if (opt->buffer_output)
398+
fwritev_or_die(stdout, data->iov, 3);
399+
else
400+
writev_or_die(1, data->iov, 3);
401+
402+
/* writev_or_die may move iov[1].iov_base, so it's invalid */
403+
data->iov[1].iov_base = NULL;
404+
}
405+
406+
static void print_object_or_die(struct batch_options *opt,
407+
struct expand_data *data, struct strbuf *hdr)
381408
{
382409
const struct object_id *oid = &data->oid;
383410

384411
assert(data->info.typep);
385412

386-
if (data->type == OBJ_BLOB) {
387-
if (opt->buffer_output)
388-
fflush(stdout);
413+
if (data->iov[1].iov_base) {
414+
void *content = data->iov[1].iov_base;
415+
unsigned long size = data->size;
416+
417+
if (use_mailmap && (data->type == OBJ_COMMIT ||
418+
data->type == OBJ_TAG)) {
419+
size_t s = size;
420+
421+
if (data->info.whence == OI_DBCACHED) {
422+
content = xmemdupz(content, s);
423+
data->info.whence = OI_PACKED;
424+
}
425+
426+
content = replace_idents_using_mailmap(content, &s);
427+
data->iov[1].iov_base = content;
428+
size = cast_size_t_to_ulong(s);
429+
}
430+
batch_writev(opt, data, hdr, size);
431+
switch (data->info.whence) {
432+
case OI_CACHED:
433+
/*
434+
* only blame uses OI_CACHED atm, so it's unlikely
435+
* we'll ever hit this path
436+
*/
437+
BUG("TODO OI_CACHED support not done");
438+
case OI_LOOSE:
439+
case OI_PACKED:
440+
free(content);
441+
break;
442+
case OI_DBCACHED:
443+
unlock_delta_base_cache();
444+
}
445+
} else {
446+
assert(data->type == OBJ_BLOB);
389447
if (opt->transform_mode) {
390448
char *contents;
391449
unsigned long size;
@@ -412,36 +470,17 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
412470
oid_to_hex(oid), data->rest);
413471
} else
414472
BUG("invalid transform_mode: %c", opt->transform_mode);
415-
batch_write(opt, contents, size);
473+
data->iov[1].iov_base = contents;
474+
batch_writev(opt, data, hdr, size);
416475
free(contents);
417476
} else {
477+
batch_write(opt, hdr->buf, hdr->len);
478+
if (opt->buffer_output)
479+
fflush(stdout);
418480
stream_blob(oid);
481+
batch_write(opt, &opt->output_delim, 1);
419482
}
420483
}
421-
else {
422-
enum object_type type;
423-
unsigned long size;
424-
void *contents;
425-
426-
contents = repo_read_object_file(the_repository, oid, &type,
427-
&size);
428-
if (!contents)
429-
die("object %s disappeared", oid_to_hex(oid));
430-
431-
if (use_mailmap) {
432-
size_t s = size;
433-
contents = replace_idents_using_mailmap(contents, &s);
434-
size = cast_size_t_to_ulong(s);
435-
}
436-
437-
if (type != data->type)
438-
die("object %s changed type!?", oid_to_hex(oid));
439-
if (data->info.sizep && size != data->size && !use_mailmap)
440-
die("object %s changed size!?", oid_to_hex(oid));
441-
442-
batch_write(opt, contents, size);
443-
free(contents);
444-
}
445484
}
446485

447486
static void print_default_format(struct strbuf *scratch, struct expand_data *data,
@@ -508,12 +547,10 @@ static void batch_object_write(const char *obj_name,
508547
strbuf_addch(scratch, opt->output_delim);
509548
}
510549

511-
batch_write(opt, scratch->buf, scratch->len);
512-
513-
if (opt->batch_mode == BATCH_MODE_CONTENTS) {
514-
print_object_or_die(opt, data);
515-
batch_write(opt, &opt->output_delim, 1);
516-
}
550+
if (opt->batch_mode == BATCH_MODE_CONTENTS)
551+
print_object_or_die(opt, data, scratch);
552+
else
553+
batch_write(opt, scratch->buf, scratch->len);
517554
}
518555

519556
static void batch_one_object(const char *obj_name,
@@ -655,6 +692,7 @@ static void parse_cmd_contents(struct batch_options *opt,
655692
struct expand_data *data)
656693
{
657694
opt->batch_mode = BATCH_MODE_CONTENTS;
695+
data->info.contentp = &data->iov[1].iov_base;
658696
batch_one_object(line, output, opt, data);
659697
}
660698

@@ -664,6 +702,7 @@ static void parse_cmd_info(struct batch_options *opt,
664702
struct expand_data *data)
665703
{
666704
opt->batch_mode = BATCH_MODE_INFO;
705+
data->info.contentp = NULL;
667706
batch_one_object(line, output, opt, data);
668707
}
669708

@@ -801,9 +840,20 @@ static int batch_objects(struct batch_options *opt)
801840
/*
802841
* If we are printing out the object, then always fill in the type,
803842
* since we will want to decide whether or not to stream.
843+
*
844+
* Likewise, grab the content in the initial request if it's small
845+
* and we're not planning to filter it.
804846
*/
805-
if (opt->batch_mode == BATCH_MODE_CONTENTS)
847+
if ((opt->batch_mode == BATCH_MODE_CONTENTS) ||
848+
(opt->batch_mode == BATCH_MODE_QUEUE_AND_DISPATCH)) {
806849
data.info.typep = &data.type;
850+
if (!opt->transform_mode) {
851+
data.info.sizep = &data.size;
852+
data.info.contentp = &data.iov[1].iov_base;
853+
data.info.content_limit = big_file_threshold;
854+
data.info.direct_cache = 1;
855+
}
856+
}
807857

808858
if (opt->all_objects) {
809859
struct object_cb_data cb;

config.mak.uname

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ ifeq ($(uname_S),Linux)
6868
BASIC_CFLAGS += -std=c99
6969
endif
7070
LINK_FUZZ_PROGRAMS = YesPlease
71+
HAVE_WRITEV = YesPlease
7172
endif
7273
ifeq ($(uname_S),GNU/kFreeBSD)
7374
HAVE_ALLOCA_H = YesPlease
@@ -76,6 +77,7 @@ ifeq ($(uname_S),GNU/kFreeBSD)
7677
DIR_HAS_BSD_GROUP_SEMANTICS = YesPlease
7778
LIBC_CONTAINS_LIBINTL = YesPlease
7879
FREAD_READS_DIRECTORIES = UnfortunatelyYes
80+
HAVE_WRITEV = YesPlease
7981
endif
8082
ifeq ($(uname_S),UnixWare)
8183
CC = cc
@@ -292,6 +294,7 @@ ifeq ($(uname_S),FreeBSD)
292294
PAGER_ENV = LESS=FRX LV=-c MORE=FRX
293295
FREAD_READS_DIRECTORIES = UnfortunatelyYes
294296
FILENO_IS_A_MACRO = UnfortunatelyYes
297+
HAVE_WRITEV = YesPlease
295298
endif
296299
ifeq ($(uname_S),OpenBSD)
297300
NO_STRCASESTR = YesPlease
@@ -307,6 +310,7 @@ ifeq ($(uname_S),OpenBSD)
307310
PROCFS_EXECUTABLE_PATH = /proc/curproc/file
308311
FREAD_READS_DIRECTORIES = UnfortunatelyYes
309312
FILENO_IS_A_MACRO = UnfortunatelyYes
313+
HAVE_WRITEV = YesPlease
310314
endif
311315
ifeq ($(uname_S),MirBSD)
312316
NO_STRCASESTR = YesPlease
@@ -329,6 +333,7 @@ ifeq ($(uname_S),NetBSD)
329333
HAVE_BSD_KERN_PROC_SYSCTL = YesPlease
330334
CSPRNG_METHOD = arc4random
331335
PROCFS_EXECUTABLE_PATH = /proc/curproc/exe
336+
HAVE_WRITEV = YesPlease
332337
endif
333338
ifeq ($(uname_S),AIX)
334339
DEFAULT_PAGER = more

git-compat-util.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,16 @@ static inline int git_setitimer(int which UNUSED,
401401
#define setitimer(which,value,ovalue) git_setitimer(which,value,ovalue)
402402
#endif
403403

404+
#ifdef HAVE_WRITEV
405+
#include <sys/uio.h>
406+
#define git_iovec iovec
407+
#else /* !HAVE_WRITEV */
408+
struct git_iovec {
409+
void *iov_base;
410+
size_t iov_len;
411+
};
412+
#endif /* !HAVE_WRITEV */
413+
404414
#ifndef NO_LIBGEN_H
405415
#include <libgen.h>
406416
#else

object-file.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1525,6 +1525,13 @@ static int loose_object_info(struct repository *r,
15251525

15261526
if (!oi->contentp)
15271527
break;
1528+
if (oi->content_limit && *oi->typep == OBJ_BLOB &&
1529+
*oi->sizep > oi->content_limit) {
1530+
git_inflate_end(&stream);
1531+
oi->contentp = NULL;
1532+
goto cleanup;
1533+
}
1534+
15281535
*oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid);
15291536
if (*oi->contentp)
15301537
goto cleanup;
@@ -1613,6 +1620,11 @@ static int do_oid_object_info_extended(struct repository *r,
16131620
oidclr(oi->delta_base_oid, the_repository->hash_algo);
16141621
if (oi->type_name)
16151622
strbuf_addstr(oi->type_name, type_name(co->type));
1623+
/*
1624+
* Currently `blame' is the only command which creates
1625+
* OI_CACHED, and direct_cache is only used by `cat-file'.
1626+
*/
1627+
assert(!oi->direct_cache);
16161628
if (oi->contentp)
16171629
*oi->contentp = xmemdupz(co->buf, co->size);
16181630
oi->whence = OI_CACHED;

object-store-ll.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ struct object_info {
304304
struct object_id *delta_base_oid;
305305
struct strbuf *type_name;
306306
void **contentp;
307+
size_t content_limit;
307308

308309
/* Response */
309310
enum {
@@ -312,6 +313,14 @@ struct object_info {
312313
OI_PACKED,
313314
OI_DBCACHED
314315
} whence;
316+
317+
/*
318+
* Set if caller is able to use OI_DBCACHED entries without copying.
319+
* This only applies to OI_DBCACHED entries at the moment,
320+
* not OI_CACHED or any other type of entry.
321+
*/
322+
unsigned direct_cache:1;
323+
315324
union {
316325
/*
317326
* struct {

0 commit comments

Comments
 (0)