Skip to content

Commit b669109

Browse files
committed
Add streaming filter API
This introduces an API to plug custom filters to an input stream. The caller gets get_stream_filter("path") to obtain an appropriate filter for the path, and then uses it when opening an input stream via open_istream(). After that, the caller can read from the stream with read_istream(), and close it with close_istream(), just like an unfiltered stream. This only adds a "null" filter that is a pass-thru filter, but later changes can add LF-to-CRLF and other filters, and the callers of the streaming API do not have to change. Signed-off-by: Junio C Hamano <[email protected]>
1 parent d1bf0e0 commit b669109

File tree

5 files changed

+209
-16
lines changed

5 files changed

+209
-16
lines changed

convert.c

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -814,12 +814,69 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
814814
return ret | convert_to_git(path, src, len, dst, 0);
815815
}
816816

817+
/*****************************************************************
818+
*
819+
* Streaming converison support
820+
*
821+
*****************************************************************/
822+
823+
typedef int (*filter_fn)(struct stream_filter *,
824+
const char *input, size_t *isize_p,
825+
char *output, size_t *osize_p);
826+
typedef void (*free_fn)(struct stream_filter *);
827+
828+
struct stream_filter_vtbl {
829+
filter_fn filter;
830+
free_fn free;
831+
};
832+
833+
struct stream_filter {
834+
struct stream_filter_vtbl *vtbl;
835+
};
836+
837+
static int null_filter_fn(struct stream_filter *filter,
838+
const char *input, size_t *isize_p,
839+
char *output, size_t *osize_p)
840+
{
841+
size_t count = *isize_p;
842+
if (*osize_p < count)
843+
count = *osize_p;
844+
if (count) {
845+
memmove(output, input, count);
846+
*isize_p -= count;
847+
*osize_p -= count;
848+
}
849+
return 0;
850+
}
851+
852+
static void null_free_fn(struct stream_filter *filter)
853+
{
854+
; /* nothing -- null instances are shared */
855+
}
856+
857+
static struct stream_filter_vtbl null_vtbl = {
858+
null_filter_fn,
859+
null_free_fn,
860+
};
861+
862+
static struct stream_filter null_filter_singleton = {
863+
&null_vtbl,
864+
};
865+
866+
int is_null_stream_filter(struct stream_filter *filter)
867+
{
868+
return filter == &null_filter_singleton;
869+
}
870+
817871
/*
818-
* You would be crazy to set CRLF, smuge/clean or ident to
819-
* a large binary blob you would want us not to slurp into
820-
* the memory!
872+
* Return an appropriately constructed filter for the path, or NULL if
873+
* the contents cannot be filtered without reading the whole thing
874+
* in-core.
875+
*
876+
* Note that you would be crazy to set CRLF, smuge/clean or ident to a
877+
* large binary blob you would want us not to slurp into the memory!
821878
*/
822-
int can_bypass_conversion(const char *path)
879+
struct stream_filter *get_stream_filter(const char *path, const unsigned char *sha1)
823880
{
824881
struct conv_attrs ca;
825882
enum crlf_action crlf_action;
@@ -828,11 +885,24 @@ int can_bypass_conversion(const char *path)
828885

829886
if (ca.ident ||
830887
(ca.drv && (ca.drv->smudge || ca.drv->clean)))
831-
return 0;
888+
return NULL;
832889

833890
crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
834891
if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) ||
835892
(crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE))
836-
return 1;
837-
return 0;
893+
return &null_filter_singleton;
894+
895+
return NULL;
896+
}
897+
898+
void free_stream_filter(struct stream_filter *filter)
899+
{
900+
filter->vtbl->free(filter);
901+
}
902+
903+
int stream_filter(struct stream_filter *filter,
904+
const char *input, size_t *isize_p,
905+
char *output, size_t *osize_p)
906+
{
907+
return filter->vtbl->filter(filter, input, isize_p, output, osize_p);
838908
}

convert.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,26 @@ extern int convert_to_working_tree(const char *path, const char *src,
4040
size_t len, struct strbuf *dst);
4141
extern int renormalize_buffer(const char *path, const char *src, size_t len,
4242
struct strbuf *dst);
43-
extern int can_bypass_conversion(const char *path);
43+
44+
/*****************************************************************
45+
*
46+
* Streaming converison support
47+
*
48+
*****************************************************************/
49+
50+
struct stream_filter; /* opaque */
51+
52+
extern struct stream_filter *get_stream_filter(const char *path, const unsigned char *);
53+
extern void free_stream_filter(struct stream_filter *);
54+
extern int is_null_stream_filter(struct stream_filter *);
55+
56+
/*
57+
* Use as much input up to *isize_p and fill output up to *osize_p;
58+
* update isize_p and osize_p to indicate how much buffer space was
59+
* consumed and filled. Return 0 on success, non-zero on error.
60+
*/
61+
extern int stream_filter(struct stream_filter *,
62+
const char *input, size_t *isize_p,
63+
char *output, size_t *osize_p);
64+
4465
#endif /* CONVERT_H */

entry.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ static int fstat_output(int fd, const struct checkout *state, struct stat *st)
116116
}
117117

118118
static int streaming_write_entry(struct cache_entry *ce, char *path,
119+
struct stream_filter *filter,
119120
const struct checkout *state, int to_tempfile,
120121
int *fstat_done, struct stat *statbuf)
121122
{
@@ -126,7 +127,7 @@ static int streaming_write_entry(struct cache_entry *ce, char *path,
126127
ssize_t kept = 0;
127128
int fd = -1;
128129

129-
st = open_istream(ce->sha1, &type, &sz);
130+
st = open_istream(ce->sha1, &type, &sz, filter);
130131
if (!st)
131132
return -1;
132133
if (type != OBJ_BLOB)
@@ -186,11 +187,14 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
186187
size_t wrote, newsize = 0;
187188
struct stat st;
188189

189-
if ((ce_mode_s_ifmt == S_IFREG) &&
190-
can_bypass_conversion(path) &&
191-
!streaming_write_entry(ce, path, state, to_tempfile,
192-
&fstat_done, &st))
193-
goto finish;
190+
if (ce_mode_s_ifmt == S_IFREG) {
191+
struct stream_filter *filter = get_stream_filter(path, ce->sha1);
192+
if (filter &&
193+
!streaming_write_entry(ce, path, filter,
194+
state, to_tempfile,
195+
&fstat_done, &st))
196+
goto finish;
197+
}
194198

195199
switch (ce_mode_s_ifmt) {
196200
case S_IFREG:

streaming.c

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,27 @@ struct stream_vtbl {
4141
static open_method_decl(incore);
4242
static open_method_decl(loose);
4343
static open_method_decl(pack_non_delta);
44+
static struct git_istream *attach_stream_filter(struct git_istream *st,
45+
struct stream_filter *filter);
46+
4447

4548
static open_istream_fn open_istream_tbl[] = {
4649
open_istream_incore,
4750
open_istream_loose,
4851
open_istream_pack_non_delta,
4952
};
5053

54+
#define FILTER_BUFFER (1024*16)
55+
56+
struct filtered_istream {
57+
struct git_istream *upstream;
58+
struct stream_filter *filter;
59+
char ibuf[FILTER_BUFFER];
60+
char obuf[FILTER_BUFFER];
61+
int i_end, i_ptr;
62+
int o_end, o_ptr;
63+
};
64+
5165
struct git_istream {
5266
const struct stream_vtbl *vtbl;
5367
unsigned long size; /* inflated size of full object */
@@ -72,6 +86,8 @@ struct git_istream {
7286
struct packed_git *pack;
7387
off_t pos;
7488
} in_pack;
89+
90+
struct filtered_istream filtered;
7591
} u;
7692
};
7793

@@ -112,7 +128,8 @@ static enum input_source istream_source(const unsigned char *sha1,
112128

113129
struct git_istream *open_istream(const unsigned char *sha1,
114130
enum object_type *type,
115-
unsigned long *size)
131+
unsigned long *size,
132+
struct stream_filter *filter)
116133
{
117134
struct git_istream *st;
118135
struct object_info oi;
@@ -129,6 +146,14 @@ struct git_istream *open_istream(const unsigned char *sha1,
129146
return NULL;
130147
}
131148
}
149+
if (st && filter) {
150+
/* Add "&& !is_null_stream_filter(filter)" for performance */
151+
struct git_istream *nst = attach_stream_filter(st, filter);
152+
if (!nst)
153+
close_istream(st);
154+
st = nst;
155+
}
156+
132157
*size = st->size;
133158
return st;
134159
}
@@ -147,6 +172,79 @@ static void close_deflated_stream(struct git_istream *st)
147172
}
148173

149174

175+
/*****************************************************************
176+
*
177+
* Filtered stream
178+
*
179+
*****************************************************************/
180+
181+
static close_method_decl(filtered)
182+
{
183+
free_stream_filter(st->u.filtered.filter);
184+
return close_istream(st->u.filtered.upstream);
185+
}
186+
187+
static read_method_decl(filtered)
188+
{
189+
struct filtered_istream *fs = &(st->u.filtered);
190+
size_t filled = 0;
191+
192+
while (sz) {
193+
/* do we already have filtered output? */
194+
if (fs->o_ptr < fs->o_end) {
195+
size_t to_move = fs->o_end - fs->o_ptr;
196+
if (sz < to_move)
197+
to_move = sz;
198+
memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move);
199+
fs->o_ptr += to_move;
200+
sz -= to_move;
201+
filled += to_move;
202+
continue;
203+
}
204+
fs->o_end = fs->o_ptr = 0;
205+
206+
/* do we have anything to feed the filter with? */
207+
if (fs->i_ptr < fs->i_end) {
208+
size_t to_feed = fs->i_end - fs->i_ptr;
209+
size_t to_receive = FILTER_BUFFER;
210+
if (stream_filter(fs->filter,
211+
fs->ibuf + fs->i_ptr, &to_feed,
212+
fs->obuf, &to_receive))
213+
return -1;
214+
fs->i_ptr = fs->i_end - to_feed;
215+
fs->o_end = FILTER_BUFFER - to_receive;
216+
continue;
217+
}
218+
fs->i_end = fs->i_ptr = 0;
219+
220+
/* refill the input from the upstream */
221+
fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER);
222+
if (fs->i_end <= 0)
223+
break;
224+
}
225+
return filled;
226+
}
227+
228+
static struct stream_vtbl filtered_vtbl = {
229+
close_istream_filtered,
230+
read_istream_filtered,
231+
};
232+
233+
static struct git_istream *attach_stream_filter(struct git_istream *st,
234+
struct stream_filter *filter)
235+
{
236+
struct git_istream *ifs = xmalloc(sizeof(*ifs));
237+
struct filtered_istream *fs = &(ifs->u.filtered);
238+
239+
ifs->vtbl = &filtered_vtbl;
240+
fs->upstream = st;
241+
fs->filter = filter;
242+
fs->i_end = fs->i_ptr = 0;
243+
fs->o_end = fs->o_ptr = 0;
244+
ifs->size = -1; /* unknown */
245+
return ifs;
246+
}
247+
150248
/*****************************************************************
151249
*
152250
* Loose object stream

streaming.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
/* opaque */
99
struct git_istream;
1010

11-
extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *);
11+
extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *, struct stream_filter *);
1212
extern int close_istream(struct git_istream *);
1313
extern ssize_t read_istream(struct git_istream *, char *, size_t);
1414

0 commit comments

Comments
 (0)