Skip to content

Commit dd8e912

Browse files
committed
streaming_write_entry(): use streaming API in write_entry()
When the output to a path does not have to be converted, we can read from the object database from the streaming API and write to the file in the working tree, without having to hold everything in the memory. The ident, auto- and safe- crlf conversions inherently require you to read the whole thing before deciding what to do, so while it is technically possible to support them by using a buffer of an unbound size or rewinding and reading the stream twice, it is less practical than the traditional "read the whole thing in core and convert" approach. Adding streaming filters for the other conversions on top of this should be doable by tweaking the can_bypass_conversion() function (it should be renamed to can_filter_stream() when it happens). Then the streaming API can be extended to wrap the git_istream streaming_write_entry() opens on the underlying object in another git_istream that reads from it, filters what is read, and let the streaming_write_entry() read the filtered result. But that is outside the scope of this series. Signed-off-by: Junio C Hamano <[email protected]>
1 parent 46bf043 commit dd8e912

File tree

3 files changed

+76
-0
lines changed

3 files changed

+76
-0
lines changed

cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,7 @@ extern int convert_to_git(const char *path, const char *src, size_t len,
11561156
struct strbuf *dst, enum safe_crlf checksafe);
11571157
extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
11581158
extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
1159+
extern int can_bypass_conversion(const char *path);
11591160

11601161
/* add */
11611162
/*

convert.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -813,3 +813,26 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
813813
}
814814
return ret | convert_to_git(path, src, len, dst, 0);
815815
}
816+
817+
/*
818+
* You would be crazy to set CRLF, smuge/clean or ident to
819+
* a large binary blob you would want us not to slurp into
820+
* the memory!
821+
*/
822+
int can_bypass_conversion(const char *path)
823+
{
824+
struct conv_attrs ca;
825+
enum crlf_action crlf_action;
826+
827+
convert_attrs(&ca, path);
828+
829+
if (ca.ident ||
830+
(ca.drv && (ca.drv->smudge || ca.drv->clean)))
831+
return 0;
832+
833+
crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
834+
if ((crlf_action == CRLF_BINARY) ||
835+
(crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE))
836+
return 1;
837+
return 0;
838+
}

entry.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "cache.h"
22
#include "blob.h"
33
#include "dir.h"
4+
#include "streaming.h"
45

56
static void create_directories(const char *path, int path_len,
67
const struct checkout *state)
@@ -114,6 +115,50 @@ static int fstat_output(int fd, const struct checkout *state, struct stat *st)
114115
return 0;
115116
}
116117

118+
static int streaming_write_entry(struct cache_entry *ce, char *path,
119+
const struct checkout *state, int to_tempfile,
120+
int *fstat_done, struct stat *statbuf)
121+
{
122+
struct git_istream *st;
123+
enum object_type type;
124+
unsigned long sz;
125+
int result = -1;
126+
int fd = -1;
127+
128+
st = open_istream(ce->sha1, &type, &sz);
129+
if (!st)
130+
return -1;
131+
if (type != OBJ_BLOB)
132+
goto close_and_exit;
133+
134+
fd = open_output_fd(path, ce, to_tempfile);
135+
if (fd < 0)
136+
goto close_and_exit;
137+
138+
for (;;) {
139+
char buf[10240];
140+
ssize_t wrote;
141+
ssize_t readlen = read_istream(st, buf, sizeof(buf));
142+
143+
if (!readlen)
144+
break;
145+
146+
wrote = write_in_full(fd, buf, readlen);
147+
148+
if (wrote != readlen)
149+
goto close_and_exit;
150+
}
151+
*fstat_done = fstat_output(fd, state, statbuf);
152+
153+
close_and_exit:
154+
close_istream(st);
155+
if (0 <= fd)
156+
result = close(fd);
157+
if (result && 0 <= fd)
158+
unlink(path);
159+
return result;
160+
}
161+
117162
static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
118163
{
119164
unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
@@ -124,6 +169,12 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
124169
size_t wrote, newsize = 0;
125170
struct stat st;
126171

172+
if ((ce_mode_s_ifmt == S_IFREG) &&
173+
can_bypass_conversion(path) &&
174+
!streaming_write_entry(ce, path, state, to_tempfile,
175+
&fstat_done, &st))
176+
goto finish;
177+
127178
switch (ce_mode_s_ifmt) {
128179
case S_IFREG:
129180
case S_IFLNK:
@@ -176,6 +227,7 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
176227
return error("unknown file mode for %s in index", path);
177228
}
178229

230+
finish:
179231
if (state->refresh_cache) {
180232
if (!fstat_done)
181233
lstat(ce->name, &st);

0 commit comments

Comments
 (0)