Skip to content

Commit 47de6b0

Browse files
committed
Merge branch 'nd/stream-more'
Use API to read blob data in smaller chunks in more places to reduce the memory footprint. By Nguyễn Thái Ngọc Duy (6) and Junio C Hamano (1) * nd/stream-more: update-server-info: respect core.bigfilethreshold fsck: use streaming API for writing lost-found blobs show: use streaming API for showing blobs parse_object: avoid putting whole blob in core cat-file: use streaming API to print blobs Add more large blob test cases streaming: make streaming-write-entry to be more reusable
2 parents 30fd3a5 + da591a7 commit 47de6b0

File tree

11 files changed

+221
-75
lines changed

11 files changed

+221
-75
lines changed

builtin/cat-file.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "parse-options.h"
1212
#include "diff.h"
1313
#include "userdiff.h"
14+
#include "streaming.h"
1415

1516
#define BATCH 1
1617
#define BATCH_CHECK 2
@@ -127,6 +128,8 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
127128
return cmd_ls_tree(2, ls_args, NULL);
128129
}
129130

131+
if (type == OBJ_BLOB)
132+
return stream_blob_to_fd(1, sha1, NULL, 0);
130133
buf = read_sha1_file(sha1, &type, &size);
131134
if (!buf)
132135
die("Cannot read object %s", obj_name);
@@ -149,6 +152,28 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
149152
break;
150153

151154
case 0:
155+
if (type_from_string(exp_type) == OBJ_BLOB) {
156+
unsigned char blob_sha1[20];
157+
if (sha1_object_info(sha1, NULL) == OBJ_TAG) {
158+
enum object_type type;
159+
unsigned long size;
160+
char *buffer = read_sha1_file(sha1, &type, &size);
161+
if (memcmp(buffer, "object ", 7) ||
162+
get_sha1_hex(buffer + 7, blob_sha1))
163+
die("%s not a valid tag", sha1_to_hex(sha1));
164+
free(buffer);
165+
} else
166+
hashcpy(blob_sha1, sha1);
167+
168+
if (sha1_object_info(blob_sha1, NULL) == OBJ_BLOB)
169+
return stream_blob_to_fd(1, blob_sha1, NULL, 0);
170+
/*
171+
* we attempted to dereference a tag to a blob
172+
* and failed; there may be new dereference
173+
* mechanisms this code is not aware of.
174+
* fall-back to the usual case.
175+
*/
176+
}
152177
buf = read_object_with_reference(sha1, exp_type, &size, NULL);
153178
break;
154179

builtin/fsck.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "parse-options.h"
1313
#include "dir.h"
1414
#include "progress.h"
15+
#include "streaming.h"
1516

1617
#define REACHABLE 0x0001
1718
#define SEEN 0x0002
@@ -238,13 +239,8 @@ static void check_unreachable_object(struct object *obj)
238239
if (!(f = fopen(filename, "w")))
239240
die_errno("Could not open '%s'", filename);
240241
if (obj->type == OBJ_BLOB) {
241-
enum object_type type;
242-
unsigned long size;
243-
char *buf = read_sha1_file(obj->sha1,
244-
&type, &size);
245-
if (buf && fwrite(buf, 1, size, f) != size)
242+
if (stream_blob_to_fd(fileno(f), obj->sha1, NULL, 1))
246243
die_errno("Could not write '%s'", filename);
247-
free(buf);
248244
} else
249245
fprintf(f, "%s\n", sha1_to_hex(obj->sha1));
250246
if (fclose(f))

builtin/log.c

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "string-list.h"
2121
#include "parse-options.h"
2222
#include "branch.h"
23+
#include "streaming.h"
2324

2425
/* Set a default date-time format for git log ("log.date" config variable) */
2526
static const char *default_date_mode = NULL;
@@ -383,8 +384,13 @@ static void show_tagger(char *buf, int len, struct rev_info *rev)
383384
strbuf_release(&out);
384385
}
385386

386-
static int show_object(const unsigned char *sha1, int show_tag_object,
387-
struct rev_info *rev)
387+
static int show_blob_object(const unsigned char *sha1, struct rev_info *rev)
388+
{
389+
fflush(stdout);
390+
return stream_blob_to_fd(1, sha1, NULL, 0);
391+
}
392+
393+
static int show_tag_object(const unsigned char *sha1, struct rev_info *rev)
388394
{
389395
unsigned long size;
390396
enum object_type type;
@@ -394,16 +400,16 @@ static int show_object(const unsigned char *sha1, int show_tag_object,
394400
if (!buf)
395401
return error(_("Could not read object %s"), sha1_to_hex(sha1));
396402

397-
if (show_tag_object)
398-
while (offset < size && buf[offset] != '\n') {
399-
int new_offset = offset + 1;
400-
while (new_offset < size && buf[new_offset++] != '\n')
401-
; /* do nothing */
402-
if (!prefixcmp(buf + offset, "tagger "))
403-
show_tagger(buf + offset + 7,
404-
new_offset - offset - 7, rev);
405-
offset = new_offset;
406-
}
403+
assert(type == OBJ_TAG);
404+
while (offset < size && buf[offset] != '\n') {
405+
int new_offset = offset + 1;
406+
while (new_offset < size && buf[new_offset++] != '\n')
407+
; /* do nothing */
408+
if (!prefixcmp(buf + offset, "tagger "))
409+
show_tagger(buf + offset + 7,
410+
new_offset - offset - 7, rev);
411+
offset = new_offset;
412+
}
407413

408414
if (offset < size)
409415
fwrite(buf + offset, size - offset, 1, stdout);
@@ -463,7 +469,7 @@ int cmd_show(int argc, const char **argv, const char *prefix)
463469
const char *name = objects[i].name;
464470
switch (o->type) {
465471
case OBJ_BLOB:
466-
ret = show_object(o->sha1, 0, NULL);
472+
ret = show_blob_object(o->sha1, NULL);
467473
break;
468474
case OBJ_TAG: {
469475
struct tag *t = (struct tag *)o;
@@ -474,7 +480,7 @@ int cmd_show(int argc, const char **argv, const char *prefix)
474480
diff_get_color_opt(&rev.diffopt, DIFF_COMMIT),
475481
t->tag,
476482
diff_get_color_opt(&rev.diffopt, DIFF_RESET));
477-
ret = show_object(o->sha1, 1, &rev);
483+
ret = show_tag_object(o->sha1, &rev);
478484
rev.shown_one = 1;
479485
if (ret)
480486
break;

builtin/update-server-info.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ int cmd_update_server_info(int argc, const char **argv, const char *prefix)
1515
OPT_END()
1616
};
1717

18+
git_config(git_default_config, NULL);
1819
argc = parse_options(argc, argv, prefix, options,
1920
update_server_info_usage, 0);
2021
if (argc > 0)

entry.c

Lines changed: 5 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -120,58 +120,15 @@ static int streaming_write_entry(struct cache_entry *ce, char *path,
120120
const struct checkout *state, int to_tempfile,
121121
int *fstat_done, struct stat *statbuf)
122122
{
123-
struct git_istream *st;
124-
enum object_type type;
125-
unsigned long sz;
126123
int result = -1;
127-
ssize_t kept = 0;
128-
int fd = -1;
129-
130-
st = open_istream(ce->sha1, &type, &sz, filter);
131-
if (!st)
132-
return -1;
133-
if (type != OBJ_BLOB)
134-
goto close_and_exit;
124+
int fd;
135125

136126
fd = open_output_fd(path, ce, to_tempfile);
137-
if (fd < 0)
138-
goto close_and_exit;
139-
140-
for (;;) {
141-
char buf[1024 * 16];
142-
ssize_t wrote, holeto;
143-
ssize_t readlen = read_istream(st, buf, sizeof(buf));
144-
145-
if (!readlen)
146-
break;
147-
if (sizeof(buf) == readlen) {
148-
for (holeto = 0; holeto < readlen; holeto++)
149-
if (buf[holeto])
150-
break;
151-
if (readlen == holeto) {
152-
kept += holeto;
153-
continue;
154-
}
155-
}
156-
157-
if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
158-
goto close_and_exit;
159-
else
160-
kept = 0;
161-
wrote = write_in_full(fd, buf, readlen);
162-
163-
if (wrote != readlen)
164-
goto close_and_exit;
165-
}
166-
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
167-
write(fd, "", 1) != 1))
168-
goto close_and_exit;
169-
*fstat_done = fstat_output(fd, state, statbuf);
170-
171-
close_and_exit:
172-
close_istream(st);
173-
if (0 <= fd)
127+
if (0 <= fd) {
128+
result = stream_blob_to_fd(fd, ce->sha1, filter, 1);
129+
*fstat_done = fstat_output(fd, state, statbuf);
174130
result = close(fd);
131+
}
175132
if (result && 0 <= fd)
176133
unlink(path);
177134
return result;

object.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,17 @@ struct object *parse_object(const unsigned char *sha1)
198198
if (obj && obj->parsed)
199199
return obj;
200200

201+
if ((obj && obj->type == OBJ_BLOB) ||
202+
(!obj && has_sha1_file(sha1) &&
203+
sha1_object_info(sha1, NULL) == OBJ_BLOB)) {
204+
if (check_sha1_signature(repl, NULL, 0, NULL) < 0) {
205+
error("sha1 mismatch %s\n", sha1_to_hex(repl));
206+
return NULL;
207+
}
208+
parse_blob_buffer(lookup_blob(sha1), NULL, 0);
209+
return lookup_object(sha1);
210+
}
211+
201212
buffer = read_sha1_file(sha1, &type, &size);
202213
if (buffer) {
203214
if (check_sha1_signature(repl, buffer, size, typename(type)) < 0) {

sha1_file.c

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "pack-revindex.h"
2020
#include "sha1-lookup.h"
2121
#include "bulk-checkin.h"
22+
#include "streaming.h"
2223

2324
#ifndef O_NOATIME
2425
#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -1146,10 +1147,47 @@ static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
11461147
return NULL;
11471148
}
11481149

1149-
int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
1150+
/*
1151+
* With an in-core object data in "map", rehash it to make sure the
1152+
* object name actually matches "sha1" to detect object corruption.
1153+
* With "map" == NULL, try reading the object named with "sha1" using
1154+
* the streaming interface and rehash it to do the same.
1155+
*/
1156+
int check_sha1_signature(const unsigned char *sha1, void *map,
1157+
unsigned long size, const char *type)
11501158
{
11511159
unsigned char real_sha1[20];
1152-
hash_sha1_file(map, size, type, real_sha1);
1160+
enum object_type obj_type;
1161+
struct git_istream *st;
1162+
git_SHA_CTX c;
1163+
char hdr[32];
1164+
int hdrlen;
1165+
1166+
if (map) {
1167+
hash_sha1_file(map, size, type, real_sha1);
1168+
return hashcmp(sha1, real_sha1) ? -1 : 0;
1169+
}
1170+
1171+
st = open_istream(sha1, &obj_type, &size, NULL);
1172+
if (!st)
1173+
return -1;
1174+
1175+
/* Generate the header */
1176+
hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1;
1177+
1178+
/* Sha1.. */
1179+
git_SHA1_Init(&c);
1180+
git_SHA1_Update(&c, hdr, hdrlen);
1181+
for (;;) {
1182+
char buf[1024 * 16];
1183+
ssize_t readlen = read_istream(st, buf, sizeof(buf));
1184+
1185+
if (!readlen)
1186+
break;
1187+
git_SHA1_Update(&c, buf, readlen);
1188+
}
1189+
git_SHA1_Final(real_sha1, &c);
1190+
close_istream(st);
11531191
return hashcmp(sha1, real_sha1) ? -1 : 0;
11541192
}
11551193

streaming.c

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,3 +489,58 @@ static open_method_decl(incore)
489489

490490
return st->u.incore.buf ? 0 : -1;
491491
}
492+
493+
494+
/****************************************************************
495+
* Users of streaming interface
496+
****************************************************************/
497+
498+
int stream_blob_to_fd(int fd, unsigned const char *sha1, struct stream_filter *filter,
499+
int can_seek)
500+
{
501+
struct git_istream *st;
502+
enum object_type type;
503+
unsigned long sz;
504+
ssize_t kept = 0;
505+
int result = -1;
506+
507+
st = open_istream(sha1, &type, &sz, filter);
508+
if (!st)
509+
return result;
510+
if (type != OBJ_BLOB)
511+
goto close_and_exit;
512+
for (;;) {
513+
char buf[1024 * 16];
514+
ssize_t wrote, holeto;
515+
ssize_t readlen = read_istream(st, buf, sizeof(buf));
516+
517+
if (!readlen)
518+
break;
519+
if (can_seek && sizeof(buf) == readlen) {
520+
for (holeto = 0; holeto < readlen; holeto++)
521+
if (buf[holeto])
522+
break;
523+
if (readlen == holeto) {
524+
kept += holeto;
525+
continue;
526+
}
527+
}
528+
529+
if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
530+
goto close_and_exit;
531+
else
532+
kept = 0;
533+
wrote = write_in_full(fd, buf, readlen);
534+
535+
if (wrote != readlen)
536+
goto close_and_exit;
537+
}
538+
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
539+
write(fd, "", 1) != 1))
540+
goto close_and_exit;
541+
result = 0;
542+
543+
close_and_exit:
544+
close_istream(st);
545+
return result;
546+
}

streaming.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,6 @@ extern struct git_istream *open_istream(const unsigned char *, enum object_type
1212
extern int close_istream(struct git_istream *);
1313
extern ssize_t read_istream(struct git_istream *, char *, size_t);
1414

15+
extern int stream_blob_to_fd(int fd, const unsigned char *, struct stream_filter *, int can_seek);
16+
1517
#endif /* STREAMING_H */

0 commit comments

Comments
 (0)