Skip to content

Commit 7a23f73

Browse files
committed
Merge branch 'jk/big-and-future-archive-tar'
"git archive" learned to handle files that are larger than 8GB and commits far in the future than expressible by the traditional US-TAR format. * jk/big-and-future-archive-tar: archive-tar: drop return value archive-tar: write extended headers for far-future mtime archive-tar: write extended headers for file sizes >= 8GB t5000: test tar files that overflow ustar headers t9300: factor out portable "head -c" replacement
2 parents 42bd668 + 5caeeb8 commit 7a23f73

File tree

6 files changed

+138
-30
lines changed

6 files changed

+138
-30
lines changed

archive-tar.c

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,16 @@ static int tar_umask = 002;
1818
static int write_tar_filter_archive(const struct archiver *ar,
1919
struct archiver_args *args);
2020

21+
/*
22+
* This is the max value that a ustar size header can specify, as it is fixed
23+
* at 11 octal digits. POSIX specifies that we switch to extended headers at
24+
* this size.
25+
*
26+
* Likewise for the mtime (which happens to use a buffer of the same size).
27+
*/
28+
#define USTAR_MAX_SIZE 077777777777UL
29+
#define USTAR_MAX_MTIME 077777777777UL
30+
2131
/* writes out the whole block, but only if it is full */
2232
static void write_if_needed(void)
2333
{
@@ -137,6 +147,20 @@ static void strbuf_append_ext_header(struct strbuf *sb, const char *keyword,
137147
strbuf_addch(sb, '\n');
138148
}
139149

150+
/*
151+
* Like strbuf_append_ext_header, but for numeric values.
152+
*/
153+
static void strbuf_append_ext_header_uint(struct strbuf *sb,
154+
const char *keyword,
155+
uintmax_t value)
156+
{
157+
char buf[40]; /* big enough for 2^128 in decimal, plus NUL */
158+
int len;
159+
160+
len = xsnprintf(buf, sizeof(buf), "%"PRIuMAX, value);
161+
strbuf_append_ext_header(sb, keyword, buf, len);
162+
}
163+
140164
static unsigned int ustar_header_chksum(const struct ustar_header *header)
141165
{
142166
const unsigned char *p = (const unsigned char *)header;
@@ -208,7 +232,7 @@ static int write_tar_entry(struct archiver_args *args,
208232
struct ustar_header header;
209233
struct strbuf ext_header = STRBUF_INIT;
210234
unsigned int old_mode = mode;
211-
unsigned long size;
235+
unsigned long size, size_in_header;
212236
void *buffer;
213237
int err = 0;
214238

@@ -267,7 +291,13 @@ static int write_tar_entry(struct archiver_args *args,
267291
memcpy(header.linkname, buffer, size);
268292
}
269293

270-
prepare_header(args, &header, mode, size);
294+
size_in_header = size;
295+
if (S_ISREG(mode) && size > USTAR_MAX_SIZE) {
296+
size_in_header = 0;
297+
strbuf_append_ext_header_uint(&ext_header, "size", size);
298+
}
299+
300+
prepare_header(args, &header, mode, size_in_header);
271301

272302
if (ext_header.len > 0) {
273303
err = write_extended_header(args, sha1, ext_header.buf,
@@ -289,15 +319,25 @@ static int write_tar_entry(struct archiver_args *args,
289319
return err;
290320
}
291321

292-
static int write_global_extended_header(struct archiver_args *args)
322+
static void write_global_extended_header(struct archiver_args *args)
293323
{
294324
const unsigned char *sha1 = args->commit_sha1;
295325
struct strbuf ext_header = STRBUF_INIT;
296326
struct ustar_header header;
297327
unsigned int mode;
298-
int err = 0;
299328

300-
strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40);
329+
if (sha1)
330+
strbuf_append_ext_header(&ext_header, "comment",
331+
sha1_to_hex(sha1), 40);
332+
if (args->time > USTAR_MAX_MTIME) {
333+
strbuf_append_ext_header_uint(&ext_header, "mtime",
334+
args->time);
335+
args->time = USTAR_MAX_MTIME;
336+
}
337+
338+
if (!ext_header.len)
339+
return;
340+
301341
memset(&header, 0, sizeof(header));
302342
*header.typeflag = TYPEFLAG_GLOBAL_HEADER;
303343
mode = 0100666;
@@ -306,7 +346,6 @@ static int write_global_extended_header(struct archiver_args *args)
306346
write_blocked(&header, sizeof(header));
307347
write_blocked(ext_header.buf, ext_header.len);
308348
strbuf_release(&ext_header);
309-
return err;
310349
}
311350

312351
static struct archiver **tar_filters;
@@ -382,10 +421,8 @@ static int write_tar_archive(const struct archiver *ar,
382421
{
383422
int err = 0;
384423

385-
if (args->commit_sha1)
386-
err = write_global_extended_header(args);
387-
if (!err)
388-
err = write_archive_entries(args, write_tar_entry);
424+
write_global_extended_header(args);
425+
err = write_archive_entries(args, write_tar_entry);
389426
if (!err)
390427
write_trailer();
391428
return err;

t/t5000-tar-tree.sh

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,4 +319,78 @@ test_expect_success 'catch non-matching pathspec' '
319319
test_must_fail git archive -v HEAD -- "*.abc" >/dev/null
320320
'
321321

322+
# Pull the size and date of each entry in a tarfile using the system tar.
323+
#
324+
# We'll pull out only the year from the date; that avoids any question of
325+
# timezones impacting the result (as long as we keep our test times away from a
326+
# year boundary; our reference times are all in August).
327+
#
328+
# The output of tar_info is expected to be "<size> <year>", both in decimal. It
329+
# ignores the return value of tar. We have to do this, because some of our test
330+
# input is only partial (the real data is 64GB in some cases).
331+
tar_info () {
332+
"$TAR" tvf "$1" |
333+
awk '{
334+
split($4, date, "-")
335+
print $3 " " date[1]
336+
}'
337+
}
338+
339+
# See if our system tar can handle a tar file with huge sizes and dates far in
340+
# the future, and that we can actually parse its output.
341+
#
342+
# The reference file was generated by GNU tar, and the magic time and size are
343+
# both octal 01000000000001, which overflows normal ustar fields.
344+
test_lazy_prereq TAR_HUGE '
345+
echo "68719476737 4147" >expect &&
346+
tar_info "$TEST_DIRECTORY"/t5000/huge-and-future.tar >actual &&
347+
test_cmp expect actual
348+
'
349+
350+
test_expect_success 'set up repository with huge blob' '
351+
obj_d=19 &&
352+
obj_f=f9c8273ec45a8938e6999cb59b3ff66739902a &&
353+
obj=${obj_d}${obj_f} &&
354+
mkdir -p .git/objects/$obj_d &&
355+
cp "$TEST_DIRECTORY"/t5000/$obj .git/objects/$obj_d/$obj_f &&
356+
rm -f .git/index &&
357+
git update-index --add --cacheinfo 100644,$obj,huge &&
358+
git commit -m huge
359+
'
360+
361+
# We expect git to die with SIGPIPE here (otherwise we
362+
# would generate the whole 64GB).
363+
test_expect_success 'generate tar with huge size' '
364+
{
365+
git archive HEAD
366+
echo $? >exit-code
367+
} | test_copy_bytes 4096 >huge.tar &&
368+
echo 141 >expect &&
369+
test_cmp expect exit-code
370+
'
371+
372+
test_expect_success TAR_HUGE 'system tar can read our huge size' '
373+
echo 68719476737 >expect &&
374+
tar_info huge.tar | cut -d" " -f1 >actual &&
375+
test_cmp expect actual
376+
'
377+
378+
test_expect_success 'set up repository with far-future commit' '
379+
rm -f .git/index &&
380+
echo content >file &&
381+
git add file &&
382+
GIT_COMMITTER_DATE="@68719476737 +0000" \
383+
git commit -m "tempori parendum"
384+
'
385+
386+
test_expect_success 'generate tar with future mtime' '
387+
git archive HEAD >future.tar
388+
'
389+
390+
test_expect_success TAR_HUGE 'system tar can read our future mtime' '
391+
echo 4147 >expect &&
392+
tar_info future.tar | cut -d" " -f2 >actual &&
393+
test_cmp expect actual
394+
'
395+
322396
test_done
Binary file not shown.

t/t5000/huge-and-future.tar

2 KB
Binary file not shown.

t/t9300-fast-import.sh

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,6 @@ test_description='test git fast-import utility'
77
. ./test-lib.sh
88
. "$TEST_DIRECTORY"/diff-lib.sh ;# test-lib chdir's into trash
99

10-
# Print $1 bytes from stdin to stdout.
11-
#
12-
# This could be written as "head -c $1", but IRIX "head" does not
13-
# support the -c option.
14-
head_c () {
15-
perl -e '
16-
my $len = $ARGV[1];
17-
while ($len > 0) {
18-
my $s;
19-
my $nread = sysread(STDIN, $s, $len);
20-
die "cannot read: $!" unless defined($nread);
21-
print $s;
22-
$len -= $nread;
23-
}
24-
' - "$1"
25-
}
26-
2710
verify_packs () {
2811
for p in .git/objects/pack/*.pack
2912
do
@@ -2481,7 +2464,7 @@ test_expect_success PIPE 'R: copy using cat-file' '
24812464
24822465
read blob_id type size <&3 &&
24832466
echo "$blob_id $type $size" >response &&
2484-
head_c $size >blob <&3 &&
2467+
test_copy_bytes $size >blob <&3 &&
24852468
read newline <&3 &&
24862469
24872470
cat <<-EOF &&
@@ -2524,7 +2507,7 @@ test_expect_success PIPE 'R: print blob mid-commit' '
25242507
EOF
25252508
25262509
read blob_id type size <&3 &&
2527-
head_c $size >actual <&3 &&
2510+
test_copy_bytes $size >actual <&3 &&
25282511
read newline <&3 &&
25292512
25302513
echo
@@ -2559,7 +2542,7 @@ test_expect_success PIPE 'R: print staged blob within commit' '
25592542
echo "cat-blob $to_get" &&
25602543
25612544
read blob_id type size <&3 &&
2562-
head_c $size >actual <&3 &&
2545+
test_copy_bytes $size >actual <&3 &&
25632546
read newline <&3 &&
25642547
25652548
echo deleteall

t/test-lib-functions.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -961,3 +961,17 @@ test_env () {
961961
done
962962
)
963963
}
964+
965+
# Read up to "$1" bytes (or to EOF) from stdin and write them to stdout.
966+
test_copy_bytes () {
967+
perl -e '
968+
my $len = $ARGV[1];
969+
while ($len > 0) {
970+
my $s;
971+
my $nread = sysread(STDIN, $s, $len);
972+
die "cannot read: $!" unless defined($nread);
973+
print $s;
974+
$len -= $nread;
975+
}
976+
' - "$1"
977+
}

0 commit comments

Comments
 (0)