Skip to content

Commit bedd3b4

Browse files
committed
Merge branch 'nd/large-blobs'
Teach a few codepaths to punt (instead of dying) when large blobs that would not fit in core are involved in the operation. * nd/large-blobs: diff: shortcut for diff'ing two binary SHA-1 objects diff --stat: mark any file larger than core.bigfilethreshold binary diff.c: allow to pass more flags to diff_populate_filespec sha1_file.c: do not die failing to malloc in unpack_compressed_entry wrapper.c: introduce gentle xmallocz that does not die()
2 parents 08ad26a + 1aaf69e commit bedd3b4

File tree

9 files changed

+125
-37
lines changed

9 files changed

+125
-37
lines changed

Documentation/config.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,8 @@ core.bigFileThreshold::
499499
Files larger than this size are stored deflated, without
500500
attempting delta compression. Storing large files without
501501
delta compression avoids excessive memory usage, at the
502-
slight expense of increased disk usage.
502+
slight expense of increased disk usage. Additionally files
503+
larger than this size are always treated as binary.
503504
+
504505
Default is 512 MiB on all platforms. This should be reasonable
505506
for most projects as source code and other text files can still

Documentation/gitattributes.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -440,8 +440,8 @@ Unspecified::
440440

441441
A path to which the `diff` attribute is unspecified
442442
first gets its contents inspected, and if it looks like
443-
text, it is treated as text. Otherwise it would
444-
generate `Binary files differ`.
443+
text and is smaller than core.bigFileThreshold, it is treated
444+
as text. Otherwise it would generate `Binary files differ`.
445445

446446
String::
447447

diff.c

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ static unsigned long diff_filespec_size(struct diff_filespec *one)
376376
{
377377
if (!DIFF_FILE_VALID(one))
378378
return 0;
379-
diff_populate_filespec(one, 1);
379+
diff_populate_filespec(one, CHECK_SIZE_ONLY);
380380
return one->size;
381381
}
382382

@@ -1910,11 +1910,11 @@ static void show_dirstat(struct diff_options *options)
19101910
diff_free_filespec_data(p->one);
19111911
diff_free_filespec_data(p->two);
19121912
} else if (DIFF_FILE_VALID(p->one)) {
1913-
diff_populate_filespec(p->one, 1);
1913+
diff_populate_filespec(p->one, CHECK_SIZE_ONLY);
19141914
copied = added = 0;
19151915
diff_free_filespec_data(p->one);
19161916
} else if (DIFF_FILE_VALID(p->two)) {
1917-
diff_populate_filespec(p->two, 1);
1917+
diff_populate_filespec(p->two, CHECK_SIZE_ONLY);
19181918
copied = 0;
19191919
added = p->two->size;
19201920
diff_free_filespec_data(p->two);
@@ -2188,8 +2188,8 @@ int diff_filespec_is_binary(struct diff_filespec *one)
21882188
one->is_binary = one->driver->binary;
21892189
else {
21902190
if (!one->data && DIFF_FILE_VALID(one))
2191-
diff_populate_filespec(one, 0);
2192-
if (one->data)
2191+
diff_populate_filespec(one, CHECK_BINARY);
2192+
if (one->is_binary == -1 && one->data)
21932193
one->is_binary = buffer_is_binary(one->data,
21942194
one->size);
21952195
if (one->is_binary == -1)
@@ -2324,6 +2324,19 @@ static void builtin_diff(const char *name_a,
23242324
} else if (!DIFF_OPT_TST(o, TEXT) &&
23252325
( (!textconv_one && diff_filespec_is_binary(one)) ||
23262326
(!textconv_two && diff_filespec_is_binary(two)) )) {
2327+
if (!one->data && !two->data &&
2328+
S_ISREG(one->mode) && S_ISREG(two->mode) &&
2329+
!DIFF_OPT_TST(o, BINARY)) {
2330+
if (!hashcmp(one->sha1, two->sha1)) {
2331+
if (must_show_header)
2332+
fprintf(o->file, "%s", header.buf);
2333+
goto free_ab_and_return;
2334+
}
2335+
fprintf(o->file, "%s", header.buf);
2336+
fprintf(o->file, "%sBinary files %s and %s differ\n",
2337+
line_prefix, lbl[0], lbl[1]);
2338+
goto free_ab_and_return;
2339+
}
23272340
if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
23282341
die("unable to read files to diff");
23292342
/* Quite common confusing case */
@@ -2668,8 +2681,9 @@ static int diff_populate_gitlink(struct diff_filespec *s, int size_only)
26682681
* grab the data for the blob (or file) for our own in-core comparison.
26692682
* diff_filespec has data and size fields for this purpose.
26702683
*/
2671-
int diff_populate_filespec(struct diff_filespec *s, int size_only)
2684+
int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
26722685
{
2686+
int size_only = flags & CHECK_SIZE_ONLY;
26732687
int err = 0;
26742688
/*
26752689
* demote FAIL to WARN to allow inspecting the situation
@@ -2724,6 +2738,11 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
27242738
}
27252739
if (size_only)
27262740
return 0;
2741+
if ((flags & CHECK_BINARY) &&
2742+
s->size > big_file_threshold && s->is_binary == -1) {
2743+
s->is_binary = 1;
2744+
return 0;
2745+
}
27272746
fd = open(s->path, O_RDONLY);
27282747
if (fd < 0)
27292748
goto err_empty;
@@ -2745,16 +2764,21 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
27452764
}
27462765
else {
27472766
enum object_type type;
2748-
if (size_only) {
2767+
if (size_only || (flags & CHECK_BINARY)) {
27492768
type = sha1_object_info(s->sha1, &s->size);
27502769
if (type < 0)
27512770
die("unable to read %s", sha1_to_hex(s->sha1));
2752-
} else {
2753-
s->data = read_sha1_file(s->sha1, &type, &s->size);
2754-
if (!s->data)
2755-
die("unable to read %s", sha1_to_hex(s->sha1));
2756-
s->should_free = 1;
2771+
if (size_only)
2772+
return 0;
2773+
if (s->size > big_file_threshold && s->is_binary == -1) {
2774+
s->is_binary = 1;
2775+
return 0;
2776+
}
27572777
}
2778+
s->data = read_sha1_file(s->sha1, &type, &s->size);
2779+
if (!s->data)
2780+
die("unable to read %s", sha1_to_hex(s->sha1));
2781+
s->should_free = 1;
27582782
}
27592783
return 0;
27602784
}
@@ -4688,8 +4712,8 @@ static int diff_filespec_check_stat_unmatch(struct diff_filepair *p)
46884712
!DIFF_FILE_VALID(p->two) ||
46894713
(p->one->sha1_valid && p->two->sha1_valid) ||
46904714
(p->one->mode != p->two->mode) ||
4691-
diff_populate_filespec(p->one, 1) ||
4692-
diff_populate_filespec(p->two, 1) ||
4715+
diff_populate_filespec(p->one, CHECK_SIZE_ONLY) ||
4716+
diff_populate_filespec(p->two, CHECK_SIZE_ONLY) ||
46934717
(p->one->size != p->two->size) ||
46944718
!diff_filespec_is_identical(p->one, p->two)) /* (2) */
46954719
p->skip_stat_unmatch_result = 1;

diffcore-rename.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,11 @@ static int estimate_similarity(struct diff_filespec *src,
147147
* is a possible size - we really should have a flag to
148148
* say whether the size is valid or not!)
149149
*/
150-
if (!src->cnt_data && diff_populate_filespec(src, 1))
150+
if (!src->cnt_data &&
151+
diff_populate_filespec(src, CHECK_SIZE_ONLY))
151152
return 0;
152-
if (!dst->cnt_data && diff_populate_filespec(dst, 1))
153+
if (!dst->cnt_data &&
154+
diff_populate_filespec(dst, CHECK_SIZE_ONLY))
153155
return 0;
154156

155157
max_size = ((src->size > dst->size) ? src->size : dst->size);

diffcore.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ extern void free_filespec(struct diff_filespec *);
5555
extern void fill_filespec(struct diff_filespec *, const unsigned char *,
5656
int, unsigned short);
5757

58-
extern int diff_populate_filespec(struct diff_filespec *, int);
58+
#define CHECK_SIZE_ONLY 1
59+
#define CHECK_BINARY 2
60+
extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
5961
extern void diff_free_filespec_data(struct diff_filespec *);
6062
extern void diff_free_filespec_blob(struct diff_filespec *);
6163
extern int diff_filespec_is_binary(struct diff_filespec *);

git-compat-util.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,7 @@ extern try_to_free_t set_try_to_free_routine(try_to_free_t);
609609
extern char *xstrdup(const char *str);
610610
extern void *xmalloc(size_t size);
611611
extern void *xmallocz(size_t size);
612+
extern void *xmallocz_gently(size_t size);
612613
extern void *xmemdupz(const void *data, size_t len);
613614
extern char *xstrndup(const char *str, size_t len);
614615
extern void *xrealloc(void *ptr, size_t size);

sha1_file.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1923,7 +1923,9 @@ static void *unpack_compressed_entry(struct packed_git *p,
19231923
git_zstream stream;
19241924
unsigned char *buffer, *in;
19251925

1926-
buffer = xmallocz(size);
1926+
buffer = xmallocz_gently(size);
1927+
if (!buffer)
1928+
return NULL;
19271929
memset(&stream, 0, sizeof(stream));
19281930
stream.next_out = buffer;
19291931
stream.avail_out = size + 1;

t/t1050-large.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,20 @@ test_expect_success 'diff --raw' '
112112
git diff --raw HEAD^
113113
'
114114

115+
test_expect_success 'diff --stat' '
116+
git diff --stat HEAD^ HEAD
117+
'
118+
119+
test_expect_success 'diff' '
120+
git diff HEAD^ HEAD >actual &&
121+
grep "Binary files.*differ" actual
122+
'
123+
124+
test_expect_success 'diff --cached' '
125+
git diff --cached HEAD^ >actual &&
126+
grep "Binary files.*differ" actual
127+
'
128+
115129
test_expect_success 'hash-object' '
116130
git hash-object large1
117131
'
@@ -163,4 +177,10 @@ test_expect_success 'zip achiving, deflate' '
163177
git archive --format=zip HEAD >/dev/null
164178
'
165179

180+
test_expect_success 'fsck' '
181+
test_must_fail git fsck 2>err &&
182+
n=$(grep "error: attempting to allocate .* over limit" err | wc -l) &&
183+
test "$n" -gt 1
184+
'
185+
166186
test_done

wrapper.c

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,23 @@ static void do_nothing(size_t size)
99

1010
static void (*try_to_free_routine)(size_t size) = do_nothing;
1111

12-
static void memory_limit_check(size_t size)
12+
static int memory_limit_check(size_t size, int gentle)
1313
{
1414
static int limit = -1;
1515
if (limit == -1) {
1616
const char *env = getenv("GIT_ALLOC_LIMIT");
1717
limit = env ? atoi(env) * 1024 : 0;
1818
}
19-
if (limit && size > limit)
20-
die("attempting to allocate %"PRIuMAX" over limit %d",
21-
(intmax_t)size, limit);
19+
if (limit && size > limit) {
20+
if (gentle) {
21+
error("attempting to allocate %"PRIuMAX" over limit %d",
22+
(intmax_t)size, limit);
23+
return -1;
24+
} else
25+
die("attempting to allocate %"PRIuMAX" over limit %d",
26+
(intmax_t)size, limit);
27+
}
28+
return 0;
2229
}
2330

2431
try_to_free_t set_try_to_free_routine(try_to_free_t routine)
@@ -42,11 +49,12 @@ char *xstrdup(const char *str)
4249
return ret;
4350
}
4451

45-
void *xmalloc(size_t size)
52+
static void *do_xmalloc(size_t size, int gentle)
4653
{
4754
void *ret;
4855

49-
memory_limit_check(size);
56+
if (memory_limit_check(size, gentle))
57+
return NULL;
5058
ret = malloc(size);
5159
if (!ret && !size)
5260
ret = malloc(1);
@@ -55,26 +63,54 @@ void *xmalloc(size_t size)
5563
ret = malloc(size);
5664
if (!ret && !size)
5765
ret = malloc(1);
58-
if (!ret)
59-
die("Out of memory, malloc failed (tried to allocate %lu bytes)",
60-
(unsigned long)size);
66+
if (!ret) {
67+
if (!gentle)
68+
die("Out of memory, malloc failed (tried to allocate %lu bytes)",
69+
(unsigned long)size);
70+
else {
71+
error("Out of memory, malloc failed (tried to allocate %lu bytes)",
72+
(unsigned long)size);
73+
return NULL;
74+
}
75+
}
6176
}
6277
#ifdef XMALLOC_POISON
6378
memset(ret, 0xA5, size);
6479
#endif
6580
return ret;
6681
}
6782

68-
void *xmallocz(size_t size)
83+
void *xmalloc(size_t size)
84+
{
85+
return do_xmalloc(size, 0);
86+
}
87+
88+
static void *do_xmallocz(size_t size, int gentle)
6989
{
7090
void *ret;
71-
if (unsigned_add_overflows(size, 1))
72-
die("Data too large to fit into virtual memory space.");
73-
ret = xmalloc(size + 1);
74-
((char*)ret)[size] = 0;
91+
if (unsigned_add_overflows(size, 1)) {
92+
if (gentle) {
93+
error("Data too large to fit into virtual memory space.");
94+
return NULL;
95+
} else
96+
die("Data too large to fit into virtual memory space.");
97+
}
98+
ret = do_xmalloc(size + 1, gentle);
99+
if (ret)
100+
((char*)ret)[size] = 0;
75101
return ret;
76102
}
77103

104+
void *xmallocz(size_t size)
105+
{
106+
return do_xmallocz(size, 0);
107+
}
108+
109+
void *xmallocz_gently(size_t size)
110+
{
111+
return do_xmallocz(size, 1);
112+
}
113+
78114
/*
79115
* xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
80116
* "data" to the allocated memory, zero terminates the allocated memory,
@@ -96,7 +132,7 @@ void *xrealloc(void *ptr, size_t size)
96132
{
97133
void *ret;
98134

99-
memory_limit_check(size);
135+
memory_limit_check(size, 0);
100136
ret = realloc(ptr, size);
101137
if (!ret && !size)
102138
ret = realloc(ptr, 1);
@@ -115,7 +151,7 @@ void *xcalloc(size_t nmemb, size_t size)
115151
{
116152
void *ret;
117153

118-
memory_limit_check(size * nmemb);
154+
memory_limit_check(size * nmemb, 0);
119155
ret = calloc(nmemb, size);
120156
if (!ret && (!nmemb || !size))
121157
ret = calloc(1, 1);

0 commit comments

Comments
 (0)