Skip to content

Commit b5a2d6c

Browse files
committed
Merge branch 'rs/archive-with-internal-gzip'
Teach "git archive" to (optionally and then by default) avoid spawning an external "gzip" process when creating ".tar.gz" (and ".tgz") archives. * rs/archive-with-internal-gzip: archive-tar: use internal gzip by default archive-tar: use OS_CODE 3 (Unix) for internal gzip archive-tar: add internal gzip implementation archive-tar: factor out write_block() archive: rename archiver data field to filter_command archive: update format documentation
2 parents c2d0109 + 4f4be00 commit b5a2d6c

File tree

4 files changed

+100
-28
lines changed

4 files changed

+100
-28
lines changed

Documentation/git-archive.txt

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,12 @@ OPTIONS
3434
-------
3535

3636
--format=<fmt>::
37-
Format of the resulting archive: 'tar' or 'zip'. If this option
37+
Format of the resulting archive. Possible values are `tar`,
38+
`zip`, `tar.gz`, `tgz`, and any format defined using the
39+
configuration option `tar.<format>.command`. If `--format`
3840
is not given, and the output file is specified, the format is
39-
inferred from the filename if possible (e.g. writing to "foo.zip"
40-
makes the output to be in the zip format). Otherwise the output
41+
inferred from the filename if possible (e.g. writing to `foo.zip`
42+
makes the output to be in the `zip` format). Otherwise the output
4143
format is `tar`.
4244

4345
-l::
@@ -143,17 +145,16 @@ tar.<format>.command::
143145
is executed using the shell with the generated tar file on its
144146
standard input, and should produce the final output on its
145147
standard output. Any compression-level options will be passed
146-
to the command (e.g., "-9"). An output file with the same
147-
extension as `<format>` will be use this format if no other
148-
format is given.
148+
to the command (e.g., `-9`).
149149
+
150-
The "tar.gz" and "tgz" formats are defined automatically and default to
151-
`gzip -cn`. You may override them with custom commands.
150+
The `tar.gz` and `tgz` formats are defined automatically and use the
151+
magic command `git archive gzip` by default, which invokes an internal
152+
implementation of gzip.
152153

153154
tar.<format>.remote::
154-
If true, enable `<format>` for use by remote clients via
155+
If true, enable the format for use by remote clients via
155156
linkgit:git-upload-archive[1]. Defaults to false for
156-
user-defined formats, but true for the "tar.gz" and "tgz"
157+
user-defined formats, but true for the `tar.gz` and `tgz`
157158
formats.
158159

159160
[[ATTRIBUTES]]

archive-tar.c

Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,18 @@ static int write_tar_filter_archive(const struct archiver *ar,
3838
#define USTAR_MAX_MTIME 077777777777ULL
3939
#endif
4040

41+
static void tar_write_block(const void *buf)
42+
{
43+
write_or_die(1, buf, BLOCKSIZE);
44+
}
45+
46+
static void (*write_block)(const void *) = tar_write_block;
47+
4148
/* writes out the whole block, but only if it is full */
4249
static void write_if_needed(void)
4350
{
4451
if (offset == BLOCKSIZE) {
45-
write_or_die(1, block, BLOCKSIZE);
52+
write_block(block);
4653
offset = 0;
4754
}
4855
}
@@ -66,7 +73,7 @@ static void do_write_blocked(const void *data, unsigned long size)
6673
write_if_needed();
6774
}
6875
while (size >= BLOCKSIZE) {
69-
write_or_die(1, buf, BLOCKSIZE);
76+
write_block(buf);
7077
size -= BLOCKSIZE;
7178
buf += BLOCKSIZE;
7279
}
@@ -101,10 +108,10 @@ static void write_trailer(void)
101108
{
102109
int tail = BLOCKSIZE - offset;
103110
memset(block + offset, 0, tail);
104-
write_or_die(1, block, BLOCKSIZE);
111+
write_block(block);
105112
if (tail < 2 * RECORDSIZE) {
106113
memset(block, 0, offset);
107-
write_or_die(1, block, BLOCKSIZE);
114+
write_block(block);
108115
}
109116
}
110117

@@ -383,8 +390,8 @@ static int tar_filter_config(const char *var, const char *value, void *data)
383390
if (!strcmp(type, "command")) {
384391
if (!value)
385392
return config_error_nonbool(var);
386-
free(ar->data);
387-
ar->data = xstrdup(value);
393+
free(ar->filter_command);
394+
ar->filter_command = xstrdup(value);
388395
return 0;
389396
}
390397
if (!strcmp(type, "remote")) {
@@ -425,17 +432,65 @@ static int write_tar_archive(const struct archiver *ar,
425432
return err;
426433
}
427434

435+
static git_zstream gzstream;
436+
static unsigned char outbuf[16384];
437+
438+
static void tgz_deflate(int flush)
439+
{
440+
while (gzstream.avail_in || flush == Z_FINISH) {
441+
int status = git_deflate(&gzstream, flush);
442+
if (!gzstream.avail_out || status == Z_STREAM_END) {
443+
write_or_die(1, outbuf, gzstream.next_out - outbuf);
444+
gzstream.next_out = outbuf;
445+
gzstream.avail_out = sizeof(outbuf);
446+
if (status == Z_STREAM_END)
447+
break;
448+
}
449+
if (status != Z_OK && status != Z_BUF_ERROR)
450+
die(_("deflate error (%d)"), status);
451+
}
452+
}
453+
454+
static void tgz_write_block(const void *data)
455+
{
456+
gzstream.next_in = (void *)data;
457+
gzstream.avail_in = BLOCKSIZE;
458+
tgz_deflate(Z_NO_FLUSH);
459+
}
460+
461+
static const char internal_gzip_command[] = "git archive gzip";
462+
428463
static int write_tar_filter_archive(const struct archiver *ar,
429464
struct archiver_args *args)
430465
{
466+
#if ZLIB_VERNUM >= 0x1221
467+
struct gz_header_s gzhead = { .os = 3 }; /* Unix, for reproducibility */
468+
#endif
431469
struct strbuf cmd = STRBUF_INIT;
432470
struct child_process filter = CHILD_PROCESS_INIT;
433471
int r;
434472

435-
if (!ar->data)
473+
if (!ar->filter_command)
436474
BUG("tar-filter archiver called with no filter defined");
437475

438-
strbuf_addstr(&cmd, ar->data);
476+
if (!strcmp(ar->filter_command, internal_gzip_command)) {
477+
write_block = tgz_write_block;
478+
git_deflate_init_gzip(&gzstream, args->compression_level);
479+
#if ZLIB_VERNUM >= 0x1221
480+
if (deflateSetHeader(&gzstream.z, &gzhead) != Z_OK)
481+
BUG("deflateSetHeader() called too late");
482+
#endif
483+
gzstream.next_out = outbuf;
484+
gzstream.avail_out = sizeof(outbuf);
485+
486+
r = write_tar_archive(ar, args);
487+
488+
tgz_deflate(Z_FINISH);
489+
git_deflate_end(&gzstream);
490+
return r;
491+
}
492+
493+
strbuf_addstr(&cmd, ar->filter_command);
439494
if (args->compression_level >= 0)
440495
strbuf_addf(&cmd, " -%d", args->compression_level);
441496

@@ -471,14 +526,14 @@ void init_tar_archiver(void)
471526
int i;
472527
register_archiver(&tar_archiver);
473528

474-
tar_filter_config("tar.tgz.command", "gzip -cn", NULL);
529+
tar_filter_config("tar.tgz.command", internal_gzip_command, NULL);
475530
tar_filter_config("tar.tgz.remote", "true", NULL);
476-
tar_filter_config("tar.tar.gz.command", "gzip -cn", NULL);
531+
tar_filter_config("tar.tar.gz.command", internal_gzip_command, NULL);
477532
tar_filter_config("tar.tar.gz.remote", "true", NULL);
478533
git_config(git_tar_config, NULL);
479534
for (i = 0; i < nr_tar_filters; i++) {
480535
/* omit any filters that never had a command configured */
481-
if (tar_filters[i]->data)
536+
if (tar_filters[i]->filter_command)
482537
register_archiver(tar_filters[i]);
483538
}
484539
}

archive.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ struct archiver {
4343
const char *name;
4444
int (*write_archive)(const struct archiver *, struct archiver_args *);
4545
unsigned flags;
46-
void *data;
46+
char *filter_command;
4747
};
4848
void register_archiver(struct archiver *);
4949

t/t5000-tar-tree.sh

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -339,21 +339,21 @@ test_expect_success 'only enabled filters are available remotely' '
339339
test_cmp_bin remote.bar config.bar
340340
'
341341

342-
test_expect_success GZIP 'git archive --format=tgz' '
342+
test_expect_success 'git archive --format=tgz' '
343343
git archive --format=tgz HEAD >j.tgz
344344
'
345345

346-
test_expect_success GZIP 'git archive --format=tar.gz' '
346+
test_expect_success 'git archive --format=tar.gz' '
347347
git archive --format=tar.gz HEAD >j1.tar.gz &&
348348
test_cmp_bin j.tgz j1.tar.gz
349349
'
350350

351-
test_expect_success GZIP 'infer tgz from .tgz filename' '
351+
test_expect_success 'infer tgz from .tgz filename' '
352352
git archive --output=j2.tgz HEAD &&
353353
test_cmp_bin j.tgz j2.tgz
354354
'
355355

356-
test_expect_success GZIP 'infer tgz from .tar.gz filename' '
356+
test_expect_success 'infer tgz from .tar.gz filename' '
357357
git archive --output=j3.tar.gz HEAD &&
358358
test_cmp_bin j.tgz j3.tar.gz
359359
'
@@ -363,17 +363,33 @@ test_expect_success GZIP 'extract tgz file' '
363363
test_cmp_bin b.tar j.tar
364364
'
365365

366-
test_expect_success GZIP 'remote tar.gz is allowed by default' '
366+
test_expect_success 'remote tar.gz is allowed by default' '
367367
git archive --remote=. --format=tar.gz HEAD >remote.tar.gz &&
368368
test_cmp_bin j.tgz remote.tar.gz
369369
'
370370

371-
test_expect_success GZIP 'remote tar.gz can be disabled' '
371+
test_expect_success 'remote tar.gz can be disabled' '
372372
git config tar.tar.gz.remote false &&
373373
test_must_fail git archive --remote=. --format=tar.gz HEAD \
374374
>remote.tar.gz
375375
'
376376

377+
test_expect_success GZIP 'git archive --format=tgz (external gzip)' '
378+
test_config tar.tgz.command "gzip -cn" &&
379+
git archive --format=tgz HEAD >external_gzip.tgz
380+
'
381+
382+
test_expect_success GZIP 'git archive --format=tar.gz (external gzip)' '
383+
test_config tar.tar.gz.command "gzip -cn" &&
384+
git archive --format=tar.gz HEAD >external_gzip.tar.gz &&
385+
test_cmp_bin external_gzip.tgz external_gzip.tar.gz
386+
'
387+
388+
test_expect_success GZIP 'extract tgz file (external gzip)' '
389+
gzip -d -c <external_gzip.tgz >external_gzip.tar &&
390+
test_cmp_bin b.tar external_gzip.tar
391+
'
392+
377393
test_expect_success 'archive and :(glob)' '
378394
git archive -v HEAD -- ":(glob)**/sh" >/dev/null 2>actual &&
379395
cat >expect <<EOF &&

0 commit comments

Comments
 (0)