Skip to content

Commit 050e334

Browse files
committed
Merge branch 'ta/fast-import-parse-path-fix'
The way "git fast-import" handles paths described in its input has been tightened up and more clearly documented. * ta/fast-import-parse-path-fix: fast-import: make comments more precise fast-import: forbid escaped NUL in paths fast-import: document C-style escapes for paths fast-import: improve documentation for path quoting fast-import: remove dead strbuf fast-import: allow unquoted empty path for root fast-import: directly use strbufs for paths fast-import: tighten path unquoting
2 parents 33bbc21 + ab4ad1f commit 050e334

File tree

3 files changed

+560
-267
lines changed

3 files changed

+560
-267
lines changed

Documentation/git-fast-import.txt

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -630,18 +630,28 @@ in octal. Git only supports the following modes:
630630
In both formats `<path>` is the complete path of the file to be added
631631
(if not already existing) or modified (if already existing).
632632

633-
A `<path>` string must use UNIX-style directory separators (forward
634-
slash `/`), may contain any byte other than `LF`, and must not
635-
start with double quote (`"`).
636-
637-
A path can use C-style string quoting; this is accepted in all cases
638-
and mandatory if the filename starts with double quote or contains
639-
`LF`. In C-style quoting, the complete name should be surrounded with
640-
double quotes, and any `LF`, backslash, or double quote characters
641-
must be escaped by preceding them with a backslash (e.g.,
642-
`"path/with\n, \\ and \" in it"`).
643-
644-
The value of `<path>` must be in canonical form. That is it must not:
633+
A `<path>` can be written as unquoted bytes or a C-style quoted string.
634+
635+
When a `<path>` does not start with a double quote (`"`), it is an
636+
unquoted string and is parsed as literal bytes without any escape
637+
sequences. However, if the filename contains `LF` or starts with double
638+
quote, it cannot be represented as an unquoted string and must be
639+
quoted. Additionally, the source `<path>` in `filecopy` or `filerename`
640+
must be quoted if it contains SP.
641+
642+
When a `<path>` starts with a double quote (`"`), it is a C-style quoted
643+
string, where the complete filename is enclosed in a pair of double
644+
quotes and escape sequences are used. Certain characters must be escaped
645+
by preceding them with a backslash: `LF` is written as `\n`, backslash
646+
as `\\`, and double quote as `\"`. Some characters may optionally be
647+
written with escape sequences: `\a` for bell, `\b` for backspace, `\f`
648+
for form feed, `\n` for line feed, `\r` for carriage return, `\t` for
649+
horizontal tab, and `\v` for vertical tab. Any byte can be written with
650+
3-digit octal codes (e.g., `\033`). All filenames can be represented as
651+
quoted strings.
652+
653+
A `<path>` must use UNIX-style directory separators (forward slash `/`)
654+
and its value must be in canonical form. That is it must not:
645655

646656
* contain an empty directory component (e.g. `foo//bar` is invalid),
647657
* end with a directory separator (e.g. `foo/` is invalid),
@@ -651,6 +661,7 @@ The value of `<path>` must be in canonical form. That is it must not:
651661

652662
The root of the tree can be represented by an empty string as `<path>`.
653663

664+
`<path>` cannot contain NUL, either literally or escaped as `\000`.
654665
It is recommended that `<path>` always be encoded using UTF-8.
655666

656667
`filedelete`

builtin/fast-import.c

Lines changed: 84 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -2210,7 +2210,7 @@ static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const ch
22102210
*
22112211
* idnum ::= ':' bigint;
22122212
*
2213-
* Return the first character after the value in *endptr.
2213+
* Update *endptr to point to the first character after the value.
22142214
*
22152215
* Complain if the following character is not what is expected,
22162216
* either a space or end of the string.
@@ -2243,8 +2243,8 @@ static uintmax_t parse_mark_ref_eol(const char *p)
22432243
}
22442244

22452245
/*
2246-
* Parse the mark reference, demanding a trailing space. Return a
2247-
* pointer to the space.
2246+
* Parse the mark reference, demanding a trailing space. Update *p to
2247+
* point to the first character after the space.
22482248
*/
22492249
static uintmax_t parse_mark_ref_space(const char **p)
22502250
{
@@ -2258,10 +2258,62 @@ static uintmax_t parse_mark_ref_space(const char **p)
22582258
return mark;
22592259
}
22602260

2261+
/*
2262+
* Parse the path string into the strbuf. The path can either be quoted with
2263+
* escape sequences or unquoted without escape sequences. Unquoted strings may
2264+
* contain spaces only if `is_last_field` is nonzero; otherwise, it stops
2265+
* parsing at the first space.
2266+
*/
2267+
static void parse_path(struct strbuf *sb, const char *p, const char **endp,
2268+
int is_last_field, const char *field)
2269+
{
2270+
if (*p == '"') {
2271+
if (unquote_c_style(sb, p, endp))
2272+
die("Invalid %s: %s", field, command_buf.buf);
2273+
if (strlen(sb->buf) != sb->len)
2274+
die("NUL in %s: %s", field, command_buf.buf);
2275+
} else {
2276+
/*
2277+
* Unless we are parsing the last field of a line,
2278+
* SP is the end of this field.
2279+
*/
2280+
*endp = is_last_field
2281+
? p + strlen(p)
2282+
: strchrnul(p, ' ');
2283+
strbuf_add(sb, p, *endp - p);
2284+
}
2285+
}
2286+
2287+
/*
2288+
* Parse the path string into the strbuf, and complain if this is not the end of
2289+
* the string. Unquoted strings may contain spaces.
2290+
*/
2291+
static void parse_path_eol(struct strbuf *sb, const char *p, const char *field)
2292+
{
2293+
const char *end;
2294+
2295+
parse_path(sb, p, &end, 1, field);
2296+
if (*end)
2297+
die("Garbage after %s: %s", field, command_buf.buf);
2298+
}
2299+
2300+
/*
2301+
* Parse the path string into the strbuf, and ensure it is followed by a space.
2302+
* Unquoted strings may not contain spaces. Update *endp to point to the first
2303+
* character after the space.
2304+
*/
2305+
static void parse_path_space(struct strbuf *sb, const char *p,
2306+
const char **endp, const char *field)
2307+
{
2308+
parse_path(sb, p, endp, 0, field);
2309+
if (**endp != ' ')
2310+
die("Missing space after %s: %s", field, command_buf.buf);
2311+
(*endp)++;
2312+
}
2313+
22612314
static void file_change_m(const char *p, struct branch *b)
22622315
{
2263-
static struct strbuf uq = STRBUF_INIT;
2264-
const char *endp;
2316+
static struct strbuf path = STRBUF_INIT;
22652317
struct object_entry *oe;
22662318
struct object_id oid;
22672319
uint16_t mode, inline_data = 0;
@@ -2298,16 +2350,12 @@ static void file_change_m(const char *p, struct branch *b)
22982350
die("Missing space after SHA1: %s", command_buf.buf);
22992351
}
23002352

2301-
strbuf_reset(&uq);
2302-
if (!unquote_c_style(&uq, p, &endp)) {
2303-
if (*endp)
2304-
die("Garbage after path in: %s", command_buf.buf);
2305-
p = uq.buf;
2306-
}
2353+
strbuf_reset(&path);
2354+
parse_path_eol(&path, p, "path");
23072355

23082356
/* Git does not track empty, non-toplevel directories. */
2309-
if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *p) {
2310-
tree_content_remove(&b->branch_tree, p, NULL, 0);
2357+
if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *path.buf) {
2358+
tree_content_remove(&b->branch_tree, path.buf, NULL, 0);
23112359
return;
23122360
}
23132361

@@ -2328,10 +2376,6 @@ static void file_change_m(const char *p, struct branch *b)
23282376
if (S_ISDIR(mode))
23292377
die("Directories cannot be specified 'inline': %s",
23302378
command_buf.buf);
2331-
if (p != uq.buf) {
2332-
strbuf_addstr(&uq, p);
2333-
p = uq.buf;
2334-
}
23352379
while (read_next_command() != EOF) {
23362380
const char *v;
23372381
if (skip_prefix(command_buf.buf, "cat-blob ", &v))
@@ -2357,82 +2401,55 @@ static void file_change_m(const char *p, struct branch *b)
23572401
command_buf.buf);
23582402
}
23592403

2360-
if (!*p) {
2404+
if (!*path.buf) {
23612405
tree_content_replace(&b->branch_tree, &oid, mode, NULL);
23622406
return;
23632407
}
2364-
tree_content_set(&b->branch_tree, p, &oid, mode, NULL);
2408+
tree_content_set(&b->branch_tree, path.buf, &oid, mode, NULL);
23652409
}
23662410

23672411
static void file_change_d(const char *p, struct branch *b)
23682412
{
2369-
static struct strbuf uq = STRBUF_INIT;
2370-
const char *endp;
2413+
static struct strbuf path = STRBUF_INIT;
23712414

2372-
strbuf_reset(&uq);
2373-
if (!unquote_c_style(&uq, p, &endp)) {
2374-
if (*endp)
2375-
die("Garbage after path in: %s", command_buf.buf);
2376-
p = uq.buf;
2377-
}
2378-
tree_content_remove(&b->branch_tree, p, NULL, 1);
2415+
strbuf_reset(&path);
2416+
parse_path_eol(&path, p, "path");
2417+
tree_content_remove(&b->branch_tree, path.buf, NULL, 1);
23792418
}
23802419

2381-
static void file_change_cr(const char *s, struct branch *b, int rename)
2420+
static void file_change_cr(const char *p, struct branch *b, int rename)
23822421
{
2383-
const char *d;
2384-
static struct strbuf s_uq = STRBUF_INIT;
2385-
static struct strbuf d_uq = STRBUF_INIT;
2386-
const char *endp;
2422+
static struct strbuf source = STRBUF_INIT;
2423+
static struct strbuf dest = STRBUF_INIT;
23872424
struct tree_entry leaf;
23882425

2389-
strbuf_reset(&s_uq);
2390-
if (!unquote_c_style(&s_uq, s, &endp)) {
2391-
if (*endp != ' ')
2392-
die("Missing space after source: %s", command_buf.buf);
2393-
} else {
2394-
endp = strchr(s, ' ');
2395-
if (!endp)
2396-
die("Missing space after source: %s", command_buf.buf);
2397-
strbuf_add(&s_uq, s, endp - s);
2398-
}
2399-
s = s_uq.buf;
2400-
2401-
endp++;
2402-
if (!*endp)
2403-
die("Missing dest: %s", command_buf.buf);
2404-
2405-
d = endp;
2406-
strbuf_reset(&d_uq);
2407-
if (!unquote_c_style(&d_uq, d, &endp)) {
2408-
if (*endp)
2409-
die("Garbage after dest in: %s", command_buf.buf);
2410-
d = d_uq.buf;
2411-
}
2426+
strbuf_reset(&source);
2427+
parse_path_space(&source, p, &p, "source");
2428+
strbuf_reset(&dest);
2429+
parse_path_eol(&dest, p, "dest");
24122430

24132431
memset(&leaf, 0, sizeof(leaf));
24142432
if (rename)
2415-
tree_content_remove(&b->branch_tree, s, &leaf, 1);
2433+
tree_content_remove(&b->branch_tree, source.buf, &leaf, 1);
24162434
else
2417-
tree_content_get(&b->branch_tree, s, &leaf, 1);
2435+
tree_content_get(&b->branch_tree, source.buf, &leaf, 1);
24182436
if (!leaf.versions[1].mode)
2419-
die("Path %s not in branch", s);
2420-
if (!*d) { /* C "path/to/subdir" "" */
2437+
die("Path %s not in branch", source.buf);
2438+
if (!*dest.buf) { /* C "path/to/subdir" "" */
24212439
tree_content_replace(&b->branch_tree,
24222440
&leaf.versions[1].oid,
24232441
leaf.versions[1].mode,
24242442
leaf.tree);
24252443
return;
24262444
}
2427-
tree_content_set(&b->branch_tree, d,
2445+
tree_content_set(&b->branch_tree, dest.buf,
24282446
&leaf.versions[1].oid,
24292447
leaf.versions[1].mode,
24302448
leaf.tree);
24312449
}
24322450

24332451
static void note_change_n(const char *p, struct branch *b, unsigned char *old_fanout)
24342452
{
2435-
static struct strbuf uq = STRBUF_INIT;
24362453
struct object_entry *oe;
24372454
struct branch *s;
24382455
struct object_id oid, commit_oid;
@@ -2497,10 +2514,6 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
24972514
die("Invalid ref name or SHA1 expression: %s", p);
24982515

24992516
if (inline_data) {
2500-
if (p != uq.buf) {
2501-
strbuf_addstr(&uq, p);
2502-
p = uq.buf;
2503-
}
25042517
read_next_command();
25052518
parse_and_store_blob(&last_blob, &oid, 0);
25062519
} else if (oe) {
@@ -3152,6 +3165,7 @@ static void print_ls(int mode, const unsigned char *hash, const char *path)
31523165

31533166
static void parse_ls(const char *p, struct branch *b)
31543167
{
3168+
static struct strbuf path = STRBUF_INIT;
31553169
struct tree_entry *root = NULL;
31563170
struct tree_entry leaf = {NULL};
31573171

@@ -3168,25 +3182,17 @@ static void parse_ls(const char *p, struct branch *b)
31683182
root->versions[1].mode = S_IFDIR;
31693183
load_tree(root);
31703184
}
3171-
if (*p == '"') {
3172-
static struct strbuf uq = STRBUF_INIT;
3173-
const char *endp;
3174-
strbuf_reset(&uq);
3175-
if (unquote_c_style(&uq, p, &endp))
3176-
die("Invalid path: %s", command_buf.buf);
3177-
if (*endp)
3178-
die("Garbage after path in: %s", command_buf.buf);
3179-
p = uq.buf;
3180-
}
3181-
tree_content_get(root, p, &leaf, 1);
3185+
strbuf_reset(&path);
3186+
parse_path_eol(&path, p, "path");
3187+
tree_content_get(root, path.buf, &leaf, 1);
31823188
/*
31833189
* A directory in preparation would have a sha1 of zero
31843190
* until it is saved. Save, for simplicity.
31853191
*/
31863192
if (S_ISDIR(leaf.versions[1].mode))
31873193
store_tree(&leaf);
31883194

3189-
print_ls(leaf.versions[1].mode, leaf.versions[1].oid.hash, p);
3195+
print_ls(leaf.versions[1].mode, leaf.versions[1].oid.hash, path.buf);
31903196
if (leaf.tree)
31913197
release_tree_content_recursive(leaf.tree);
31923198
if (!b || root != &b->branch_tree)

0 commit comments

Comments
 (0)