Skip to content

Commit e132702

Browse files
peffgitster
authored andcommitted
grep: refactor the concept of "grep source" into an object
The main interface to the low-level grep code is grep_buffer, which takes a pointer to a buffer and a size. This is convenient and flexible (we use it to grep commit bodies, files on disk, and blobs by sha1), but it makes it hard to pass extra information about what we are grepping (either for correctness, like overriding binary auto-detection, or for optimizations, like lazily loading blob contents). Instead, let's encapsulate the idea of a "grep source", including the buffer, its size, and where the data is coming from. This is similar to the diff_filespec structure used by the diff code (unsurprising, since future patches will implement some of the same optimizations found there). The diffstat is slightly scarier than the actual patch content. Most of the modified lines are simply replacing access to raw variables with their counterparts that are now in a "struct grep_source". Most of the added lines were taken from builtin/grep.c, which partially abstracted the idea of grep sources (for file vs sha1 sources). Instead of dropping the now-redundant code, this patch leaves builtin/grep.c using the traditional grep_buffer interface (which now wraps the grep_source interface). That makes it easy to test that there is no change of behavior (yet). Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent b3aeb28 commit e132702

File tree

2 files changed

+186
-34
lines changed

2 files changed

+186
-34
lines changed

grep.c

Lines changed: 164 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -837,13 +837,13 @@ pthread_mutex_t grep_read_mutex;
837837
#define grep_attr_unlock()
838838
#endif
839839

840-
static int match_funcname(struct grep_opt *opt, const char *name, char *bol, char *eol)
840+
static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
841841
{
842842
xdemitconf_t *xecfg = opt->priv;
843843
if (xecfg && !xecfg->find_func) {
844844
struct userdiff_driver *drv;
845845
grep_attr_lock();
846-
drv = userdiff_find_by_path(name);
846+
drv = userdiff_find_by_path(gs->name);
847847
grep_attr_unlock();
848848
if (drv && drv->funcname.pattern) {
849849
const struct userdiff_funcname *pe = &drv->funcname;
@@ -866,33 +866,33 @@ static int match_funcname(struct grep_opt *opt, const char *name, char *bol, cha
866866
return 0;
867867
}
868868

869-
static void show_funcname_line(struct grep_opt *opt, const char *name,
870-
char *buf, char *bol, unsigned lno)
869+
static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
870+
char *bol, unsigned lno)
871871
{
872-
while (bol > buf) {
872+
while (bol > gs->buf) {
873873
char *eol = --bol;
874874

875-
while (bol > buf && bol[-1] != '\n')
875+
while (bol > gs->buf && bol[-1] != '\n')
876876
bol--;
877877
lno--;
878878

879879
if (lno <= opt->last_shown)
880880
break;
881881

882-
if (match_funcname(opt, name, bol, eol)) {
883-
show_line(opt, bol, eol, name, lno, '=');
882+
if (match_funcname(opt, gs, bol, eol)) {
883+
show_line(opt, bol, eol, gs->name, lno, '=');
884884
break;
885885
}
886886
}
887887
}
888888

889-
static void show_pre_context(struct grep_opt *opt, const char *name, char *buf,
889+
static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
890890
char *bol, char *end, unsigned lno)
891891
{
892892
unsigned cur = lno, from = 1, funcname_lno = 0;
893893
int funcname_needed = !!opt->funcname;
894894

895-
if (opt->funcbody && !match_funcname(opt, name, bol, end))
895+
if (opt->funcbody && !match_funcname(opt, gs, bol, end))
896896
funcname_needed = 2;
897897

898898
if (opt->pre_context < lno)
@@ -901,30 +901,30 @@ static void show_pre_context(struct grep_opt *opt, const char *name, char *buf,
901901
from = opt->last_shown + 1;
902902

903903
/* Rewind. */
904-
while (bol > buf &&
904+
while (bol > gs->buf &&
905905
cur > (funcname_needed == 2 ? opt->last_shown + 1 : from)) {
906906
char *eol = --bol;
907907

908-
while (bol > buf && bol[-1] != '\n')
908+
while (bol > gs->buf && bol[-1] != '\n')
909909
bol--;
910910
cur--;
911-
if (funcname_needed && match_funcname(opt, name, bol, eol)) {
911+
if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
912912
funcname_lno = cur;
913913
funcname_needed = 0;
914914
}
915915
}
916916

917917
/* We need to look even further back to find a function signature. */
918918
if (opt->funcname && funcname_needed)
919-
show_funcname_line(opt, name, buf, bol, cur);
919+
show_funcname_line(opt, gs, bol, cur);
920920

921921
/* Back forward. */
922922
while (cur < lno) {
923923
char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
924924

925925
while (*eol != '\n')
926926
eol++;
927-
show_line(opt, bol, eol, name, cur, sign);
927+
show_line(opt, bol, eol, gs->name, cur, sign);
928928
bol = eol + 1;
929929
cur++;
930930
}
@@ -991,11 +991,10 @@ static void std_output(struct grep_opt *opt, const void *buf, size_t size)
991991
fwrite(buf, size, 1, stdout);
992992
}
993993

994-
static int grep_buffer_1(struct grep_opt *opt, const char *name,
995-
char *buf, unsigned long size, int collect_hits)
994+
static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
996995
{
997-
char *bol = buf;
998-
unsigned long left = size;
996+
char *bol;
997+
unsigned long left;
999998
unsigned lno = 1;
1000999
unsigned last_hit = 0;
10011000
int binary_match_only = 0;
@@ -1023,13 +1022,16 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
10231022
}
10241023
opt->last_shown = 0;
10251024

1025+
if (grep_source_load(gs) < 0)
1026+
return 0;
1027+
10261028
switch (opt->binary) {
10271029
case GREP_BINARY_DEFAULT:
1028-
if (buffer_is_binary(buf, size))
1030+
if (buffer_is_binary(gs->buf, gs->size))
10291031
binary_match_only = 1;
10301032
break;
10311033
case GREP_BINARY_NOMATCH:
1032-
if (buffer_is_binary(buf, size))
1034+
if (buffer_is_binary(gs->buf, gs->size))
10331035
return 0; /* Assume unmatch */
10341036
break;
10351037
case GREP_BINARY_TEXT:
@@ -1043,6 +1045,8 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
10431045

10441046
try_lookahead = should_lookahead(opt);
10451047

1048+
bol = gs->buf;
1049+
left = gs->size;
10461050
while (left) {
10471051
char *eol, ch;
10481052
int hit;
@@ -1091,14 +1095,14 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
10911095
if (opt->status_only)
10921096
return 1;
10931097
if (opt->name_only) {
1094-
show_name(opt, name);
1098+
show_name(opt, gs->name);
10951099
return 1;
10961100
}
10971101
if (opt->count)
10981102
goto next_line;
10991103
if (binary_match_only) {
11001104
opt->output(opt, "Binary file ", 12);
1101-
output_color(opt, name, strlen(name),
1105+
output_color(opt, gs->name, strlen(gs->name),
11021106
opt->color_filename);
11031107
opt->output(opt, " matches\n", 9);
11041108
return 1;
@@ -1107,23 +1111,23 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
11071111
* pre-context lines, we would need to show them.
11081112
*/
11091113
if (opt->pre_context || opt->funcbody)
1110-
show_pre_context(opt, name, buf, bol, eol, lno);
1114+
show_pre_context(opt, gs, bol, eol, lno);
11111115
else if (opt->funcname)
1112-
show_funcname_line(opt, name, buf, bol, lno);
1113-
show_line(opt, bol, eol, name, lno, ':');
1116+
show_funcname_line(opt, gs, bol, lno);
1117+
show_line(opt, bol, eol, gs->name, lno, ':');
11141118
last_hit = lno;
11151119
if (opt->funcbody)
11161120
show_function = 1;
11171121
goto next_line;
11181122
}
1119-
if (show_function && match_funcname(opt, name, bol, eol))
1123+
if (show_function && match_funcname(opt, gs, bol, eol))
11201124
show_function = 0;
11211125
if (show_function ||
11221126
(last_hit && lno <= last_hit + opt->post_context)) {
11231127
/* If the last hit is within the post context,
11241128
* we need to show this line.
11251129
*/
1126-
show_line(opt, bol, eol, name, lno, '-');
1130+
show_line(opt, bol, eol, gs->name, lno, '-');
11271131
}
11281132

11291133
next_line:
@@ -1141,7 +1145,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
11411145
return 0;
11421146
if (opt->unmatch_name_only) {
11431147
/* We did not see any hit, so we want to show this */
1144-
show_name(opt, name);
1148+
show_name(opt, gs->name);
11451149
return 1;
11461150
}
11471151

@@ -1155,7 +1159,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
11551159
*/
11561160
if (opt->count && count) {
11571161
char buf[32];
1158-
output_color(opt, name, strlen(name), opt->color_filename);
1162+
output_color(opt, gs->name, strlen(gs->name), opt->color_filename);
11591163
output_sep(opt, ':');
11601164
snprintf(buf, sizeof(buf), "%u\n", count);
11611165
opt->output(opt, buf, strlen(buf));
@@ -1190,23 +1194,149 @@ static int chk_hit_marker(struct grep_expr *x)
11901194
}
11911195
}
11921196

1193-
int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
1197+
int grep_source(struct grep_opt *opt, struct grep_source *gs)
11941198
{
11951199
/*
11961200
* we do not have to do the two-pass grep when we do not check
11971201
* buffer-wide "all-match".
11981202
*/
11991203
if (!opt->all_match)
1200-
return grep_buffer_1(opt, name, buf, size, 0);
1204+
return grep_source_1(opt, gs, 0);
12011205

12021206
/* Otherwise the toplevel "or" terms hit a bit differently.
12031207
* We first clear hit markers from them.
12041208
*/
12051209
clr_hit_marker(opt->pattern_expression);
1206-
grep_buffer_1(opt, name, buf, size, 1);
1210+
grep_source_1(opt, gs, 1);
12071211

12081212
if (!chk_hit_marker(opt->pattern_expression))
12091213
return 0;
12101214

1211-
return grep_buffer_1(opt, name, buf, size, 0);
1215+
return grep_source_1(opt, gs, 0);
1216+
}
1217+
1218+
int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
1219+
{
1220+
struct grep_source gs;
1221+
int r;
1222+
1223+
grep_source_init(&gs, GREP_SOURCE_BUF, name, NULL);
1224+
gs.buf = buf;
1225+
gs.size = size;
1226+
1227+
r = grep_source(opt, &gs);
1228+
1229+
grep_source_clear(&gs);
1230+
return r;
1231+
}
1232+
1233+
void grep_source_init(struct grep_source *gs, enum grep_source_type type,
1234+
const char *name, const void *identifier)
1235+
{
1236+
gs->type = type;
1237+
gs->name = name ? xstrdup(name) : NULL;
1238+
gs->buf = NULL;
1239+
gs->size = 0;
1240+
1241+
switch (type) {
1242+
case GREP_SOURCE_FILE:
1243+
gs->identifier = xstrdup(identifier);
1244+
break;
1245+
case GREP_SOURCE_SHA1:
1246+
gs->identifier = xmalloc(20);
1247+
memcpy(gs->identifier, identifier, 20);
1248+
break;
1249+
case GREP_SOURCE_BUF:
1250+
gs->identifier = NULL;
1251+
}
1252+
}
1253+
1254+
void grep_source_clear(struct grep_source *gs)
1255+
{
1256+
free(gs->name);
1257+
gs->name = NULL;
1258+
free(gs->identifier);
1259+
gs->identifier = NULL;
1260+
grep_source_clear_data(gs);
1261+
}
1262+
1263+
void grep_source_clear_data(struct grep_source *gs)
1264+
{
1265+
switch (gs->type) {
1266+
case GREP_SOURCE_FILE:
1267+
case GREP_SOURCE_SHA1:
1268+
free(gs->buf);
1269+
gs->buf = NULL;
1270+
gs->size = 0;
1271+
break;
1272+
case GREP_SOURCE_BUF:
1273+
/* leave user-provided buf intact */
1274+
break;
1275+
}
1276+
}
1277+
1278+
static int grep_source_load_sha1(struct grep_source *gs)
1279+
{
1280+
enum object_type type;
1281+
1282+
grep_read_lock();
1283+
gs->buf = read_sha1_file(gs->identifier, &type, &gs->size);
1284+
grep_read_unlock();
1285+
1286+
if (!gs->buf)
1287+
return error(_("'%s': unable to read %s"),
1288+
gs->name,
1289+
sha1_to_hex(gs->identifier));
1290+
return 0;
1291+
}
1292+
1293+
static int grep_source_load_file(struct grep_source *gs)
1294+
{
1295+
const char *filename = gs->identifier;
1296+
struct stat st;
1297+
char *data;
1298+
size_t size;
1299+
int i;
1300+
1301+
if (lstat(filename, &st) < 0) {
1302+
err_ret:
1303+
if (errno != ENOENT)
1304+
error(_("'%s': %s"), filename, strerror(errno));
1305+
return -1;
1306+
}
1307+
if (!S_ISREG(st.st_mode))
1308+
return -1;
1309+
size = xsize_t(st.st_size);
1310+
i = open(filename, O_RDONLY);
1311+
if (i < 0)
1312+
goto err_ret;
1313+
data = xmalloc(size + 1);
1314+
if (st.st_size != read_in_full(i, data, size)) {
1315+
error(_("'%s': short read %s"), filename, strerror(errno));
1316+
close(i);
1317+
free(data);
1318+
return -1;
1319+
}
1320+
close(i);
1321+
data[size] = 0;
1322+
1323+
gs->buf = data;
1324+
gs->size = size;
1325+
return 0;
1326+
}
1327+
1328+
int grep_source_load(struct grep_source *gs)
1329+
{
1330+
if (gs->buf)
1331+
return 0;
1332+
1333+
switch (gs->type) {
1334+
case GREP_SOURCE_FILE:
1335+
return grep_source_load_file(gs);
1336+
case GREP_SOURCE_SHA1:
1337+
return grep_source_load_sha1(gs);
1338+
case GREP_SOURCE_BUF:
1339+
return gs->buf ? 0 : -1;
1340+
}
1341+
die("BUG: invalid grep_source type");
12121342
}

grep.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,28 @@ extern void compile_grep_patterns(struct grep_opt *opt);
129129
extern void free_grep_patterns(struct grep_opt *opt);
130130
extern int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size);
131131

132+
struct grep_source {
133+
char *name;
134+
135+
enum grep_source_type {
136+
GREP_SOURCE_SHA1,
137+
GREP_SOURCE_FILE,
138+
GREP_SOURCE_BUF,
139+
} type;
140+
void *identifier;
141+
142+
char *buf;
143+
unsigned long size;
144+
};
145+
146+
void grep_source_init(struct grep_source *gs, enum grep_source_type type,
147+
const char *name, const void *identifier);
148+
int grep_source_load(struct grep_source *gs);
149+
void grep_source_clear_data(struct grep_source *gs);
150+
void grep_source_clear(struct grep_source *gs);
151+
152+
int grep_source(struct grep_opt *opt, struct grep_source *gs);
153+
132154
extern struct grep_opt *grep_opt_dup(const struct grep_opt *opt);
133155
extern int grep_threads_ok(const struct grep_opt *opt);
134156

0 commit comments

Comments
 (0)