Skip to content

Commit 335ec3b

Browse files
peffgitster
authored andcommitted
grep: allow to use textconv filters
Recently and not so recently, we made sure that log/grep type operations use textconv filters when a userfacing diff would do the same: ef90ab6 (pickaxe: use textconv for -S counting, 2012-10-28) b1c2f57 (diff_grep: use textconv buffers for add/deleted files, 2012-10-28) 0508fe5 (combine-diff: respect textconv attributes, 2011-05-23) "git grep" currently does not use textconv filters at all, that is neither for displaying the match and context nor for the actual grepping, even when requested by --textconv. Introduce an option "--textconv" which makes git grep use any configured textconv filters for grepping and output purposes. It is off by default. Signed-off-by: Jeff King <[email protected]> Signed-off-by: Michael J Gruber <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 97f6a9c commit 335ec3b

File tree

5 files changed

+102
-16
lines changed

5 files changed

+102
-16
lines changed

Documentation/git-grep.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ git-grep - Print lines matching a pattern
99
SYNOPSIS
1010
--------
1111
[verse]
12-
'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp]
12+
'git grep' [-a | --text] [-I] [--textconv] [-i | --ignore-case] [-w | --word-regexp]
1313
[-v | --invert-match] [-h|-H] [--full-name]
1414
[-E | --extended-regexp] [-G | --basic-regexp]
1515
[-P | --perl-regexp]
@@ -80,6 +80,13 @@ OPTIONS
8080
--text::
8181
Process binary files as if they were text.
8282

83+
--textconv::
84+
Honor textconv filter settings.
85+
86+
--no-textconv::
87+
Do not honor textconv filter settings.
88+
This is the default.
89+
8390
-i::
8491
--ignore-case::
8592
Ignore case differences between the patterns and the

builtin/grep.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
659659
OPT_SET_INT('I', NULL, &opt.binary,
660660
N_("don't match patterns in binary files"),
661661
GREP_BINARY_NOMATCH),
662+
OPT_BOOL(0, "textconv", &opt.allow_textconv,
663+
N_("process binary files with textconv filters")),
662664
{ OPTION_INTEGER, 0, "max-depth", &opt.max_depth, N_("depth"),
663665
N_("descend at most <depth> levels"), PARSE_OPT_NONEG,
664666
NULL, 1 },

grep.c

Lines changed: 86 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#include "grep.h"
33
#include "userdiff.h"
44
#include "xdiff-interface.h"
5+
#include "diff.h"
6+
#include "diffcore.h"
57

68
static int grep_source_load(struct grep_source *gs);
79
static int grep_source_is_binary(struct grep_source *gs);
@@ -1322,6 +1324,58 @@ static void std_output(struct grep_opt *opt, const void *buf, size_t size)
13221324
fwrite(buf, size, 1, stdout);
13231325
}
13241326

1327+
static int fill_textconv_grep(struct userdiff_driver *driver,
1328+
struct grep_source *gs)
1329+
{
1330+
struct diff_filespec *df;
1331+
char *buf;
1332+
size_t size;
1333+
1334+
if (!driver || !driver->textconv)
1335+
return grep_source_load(gs);
1336+
1337+
/*
1338+
* The textconv interface is intimately tied to diff_filespecs, so we
1339+
* have to pretend to be one. If we could unify the grep_source
1340+
* and diff_filespec structs, this mess could just go away.
1341+
*/
1342+
df = alloc_filespec(gs->path);
1343+
switch (gs->type) {
1344+
case GREP_SOURCE_SHA1:
1345+
fill_filespec(df, gs->identifier, 1, 0100644);
1346+
break;
1347+
case GREP_SOURCE_FILE:
1348+
fill_filespec(df, null_sha1, 0, 0100644);
1349+
break;
1350+
default:
1351+
die("BUG: attempt to textconv something without a path?");
1352+
}
1353+
1354+
/*
1355+
* fill_textconv is not remotely thread-safe; it may load objects
1356+
* behind the scenes, and it modifies the global diff tempfile
1357+
* structure.
1358+
*/
1359+
grep_read_lock();
1360+
size = fill_textconv(driver, df, &buf);
1361+
grep_read_unlock();
1362+
free_filespec(df);
1363+
1364+
/*
1365+
* The normal fill_textconv usage by the diff machinery would just keep
1366+
* the textconv'd buf separate from the diff_filespec. But much of the
1367+
* grep code passes around a grep_source and assumes that its "buf"
1368+
* pointer is the beginning of the thing we are searching. So let's
1369+
* install our textconv'd version into the grep_source, taking care not
1370+
* to leak any existing buffer.
1371+
*/
1372+
grep_source_clear_data(gs);
1373+
gs->buf = buf;
1374+
gs->size = size;
1375+
1376+
return 0;
1377+
}
1378+
13251379
static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
13261380
{
13271381
char *bol;
@@ -1332,6 +1386,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
13321386
unsigned count = 0;
13331387
int try_lookahead = 0;
13341388
int show_function = 0;
1389+
struct userdiff_driver *textconv = NULL;
13351390
enum grep_context ctx = GREP_CONTEXT_HEAD;
13361391
xdemitconf_t xecfg;
13371392

@@ -1353,27 +1408,44 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
13531408
}
13541409
opt->last_shown = 0;
13551410

1356-
switch (opt->binary) {
1357-
case GREP_BINARY_DEFAULT:
1358-
if (grep_source_is_binary(gs))
1359-
binary_match_only = 1;
1360-
break;
1361-
case GREP_BINARY_NOMATCH:
1362-
if (grep_source_is_binary(gs))
1363-
return 0; /* Assume unmatch */
1364-
break;
1365-
case GREP_BINARY_TEXT:
1366-
break;
1367-
default:
1368-
die("bug: unknown binary handling mode");
1411+
if (opt->allow_textconv) {
1412+
grep_source_load_driver(gs);
1413+
/*
1414+
* We might set up the shared textconv cache data here, which
1415+
* is not thread-safe.
1416+
*/
1417+
grep_attr_lock();
1418+
textconv = userdiff_get_textconv(gs->driver);
1419+
grep_attr_unlock();
1420+
}
1421+
1422+
/*
1423+
* We know the result of a textconv is text, so we only have to care
1424+
* about binary handling if we are not using it.
1425+
*/
1426+
if (!textconv) {
1427+
switch (opt->binary) {
1428+
case GREP_BINARY_DEFAULT:
1429+
if (grep_source_is_binary(gs))
1430+
binary_match_only = 1;
1431+
break;
1432+
case GREP_BINARY_NOMATCH:
1433+
if (grep_source_is_binary(gs))
1434+
return 0; /* Assume unmatch */
1435+
break;
1436+
case GREP_BINARY_TEXT:
1437+
break;
1438+
default:
1439+
die("bug: unknown binary handling mode");
1440+
}
13691441
}
13701442

13711443
memset(&xecfg, 0, sizeof(xecfg));
13721444
opt->priv = &xecfg;
13731445

13741446
try_lookahead = should_lookahead(opt);
13751447

1376-
if (grep_source_load(gs) < 0)
1448+
if (fill_textconv_grep(textconv, gs) < 0)
13771449
return 0;
13781450

13791451
bol = gs->buf;

grep.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ struct grep_opt {
107107
#define GREP_BINARY_NOMATCH 1
108108
#define GREP_BINARY_TEXT 2
109109
int binary;
110+
int allow_textconv;
110111
int extended;
111112
int use_reflog_filter;
112113
int pcre;

t/t7008-grep-binary.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ test_expect_success 'grep does not honor textconv' '
160160
test_must_fail git grep Qfile
161161
'
162162

163-
test_expect_failure 'grep --textconv honors textconv' '
163+
test_expect_success 'grep --textconv honors textconv' '
164164
echo "a:binaryQfile" >expect &&
165165
git grep --textconv Qfile >actual &&
166166
test_cmp expect actual
@@ -176,4 +176,8 @@ test_expect_failure 'grep --textconv blob honors textconv' '
176176
test_cmp expect actual
177177
'
178178

179+
test_expect_success 'grep --no-textconv blob does not honor textconv' '
180+
test_must_fail git grep --no-textconv Qfile HEAD:a
181+
'
182+
179183
test_done

0 commit comments

Comments
 (0)