Skip to content

Commit eed1fbe

Browse files
peffgitster
authored andcommitted
dir.c: always copy input to add_pattern()
The add_pattern() function has a subtle and undocumented gotcha: the pattern string you pass in must remain valid as long as the pattern_list is in use (and nor do we take ownership of it). This is easy to get wrong, causing either subtle bugs (because you free or reuse the string buffer) or leaks (because you copy the string, but don't track ownership separately). All of this "pattern" code was originally the "exclude" mechanism. So this _usually_ works OK because you add entries in one of two ways: 1. From the command-line (e.g., "--exclude"), in which case we're pointing to an argv entry which remains valid for the lifetime of the program. 2. From a file (e.g., ".gitignore"), in which case we read the whole file into a buffer, attach it to the pattern_list's "filebuf" entry, then parse the buffer in-place (adding NULs). The strings point into the filebuf, which is cleaned up when the whole pattern_list goes away. But other code, like sparse-checkout, reads individual lines from stdin and passes them one by one to add_pattern(), leaking each. We could fix this by refactoring it to take in the whole buffer at once, like (2) above, and stuff it in "filebuf". But given how subtle the interface is, let's just fix it to always copy the string. That seems at first like we'd be wasting extra memory, but we can mitigate that: a. The path_pattern struct already uses a FLEXPTR, since we sometimes make a copy (when we see "foo/", we strip off the trailing slash, requiring a modifiable copy of the string). Since we'll now always embed the string inside the struct, we can switch to the regular FLEX_ARRAY pattern, saving us 8 bytes of pointer. So patterns with a trailing slash and ones under 8 bytes actually get smaller. b. Now that we don't need the original string to hang around, we can get rid of the "filebuf" mechanism entirely, and just free the file contents after parsing. Since files are the sources we'd expect to have the largest pattern sets, we should mostly break even on stuffing the same data into the individual structs. This patch just adjusts the add_pattern() interface; it doesn't fix any leaky callers yet. Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 4c844c2 commit eed1fbe

File tree

2 files changed

+7
-14
lines changed

2 files changed

+7
-14
lines changed

dir.c

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -925,12 +925,7 @@ void add_pattern(const char *string, const char *base,
925925
int nowildcardlen;
926926

927927
parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen);
928-
if (flags & PATTERN_FLAG_MUSTBEDIR) {
929-
FLEXPTR_ALLOC_MEM(pattern, pattern, string, patternlen);
930-
} else {
931-
pattern = xmalloc(sizeof(*pattern));
932-
pattern->pattern = string;
933-
}
928+
FLEX_ALLOC_MEM(pattern, pattern, string, patternlen);
934929
pattern->patternlen = patternlen;
935930
pattern->nowildcardlen = nowildcardlen;
936931
pattern->base = base;
@@ -972,7 +967,6 @@ void clear_pattern_list(struct pattern_list *pl)
972967
for (i = 0; i < pl->nr; i++)
973968
free(pl->patterns[i]);
974969
free(pl->patterns);
975-
free(pl->filebuf);
976970
clear_pattern_entry_hashmap(&pl->recursive_hashmap);
977971
clear_pattern_entry_hashmap(&pl->parent_hashmap);
978972

@@ -1166,23 +1160,23 @@ static int add_patterns(const char *fname, const char *base, int baselen,
11661160
}
11671161

11681162
add_patterns_from_buffer(buf, size, base, baselen, pl);
1163+
free(buf);
11691164
return 0;
11701165
}
11711166

11721167
static int add_patterns_from_buffer(char *buf, size_t size,
11731168
const char *base, int baselen,
11741169
struct pattern_list *pl)
11751170
{
1171+
char *orig = buf;
11761172
int i, lineno = 1;
11771173
char *entry;
11781174

11791175
hashmap_init(&pl->recursive_hashmap, pl_hashmap_cmp, NULL, 0);
11801176
hashmap_init(&pl->parent_hashmap, pl_hashmap_cmp, NULL, 0);
11811177

1182-
pl->filebuf = buf;
1183-
11841178
if (skip_utf8_bom(&buf, size))
1185-
size -= buf - pl->filebuf;
1179+
size -= buf - orig;
11861180

11871181
entry = buf;
11881182

@@ -1222,6 +1216,7 @@ int add_patterns_from_blob_to_list(
12221216
return r;
12231217

12241218
add_patterns_from_buffer(buf, size, base, baselen, pl);
1219+
free(buf);
12251220
return 0;
12261221
}
12271222

dir.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ struct path_pattern {
6262
*/
6363
struct pattern_list *pl;
6464

65-
const char *pattern;
6665
int patternlen;
6766
int nowildcardlen;
6867
const char *base;
@@ -74,6 +73,8 @@ struct path_pattern {
7473
* and from -1 decrementing for patterns from CLI args.
7574
*/
7675
int srcpos;
76+
77+
char pattern[FLEX_ARRAY];
7778
};
7879

7980
/* used for hashmaps for cone patterns */
@@ -94,9 +95,6 @@ struct pattern_list {
9495
int nr;
9596
int alloc;
9697

97-
/* remember pointer to exclude file contents so we can free() */
98-
char *filebuf;
99-
10098
/* origin of list, e.g. path to filename, or descriptive string */
10199
const char *src;
102100

0 commit comments

Comments
 (0)