Skip to content

Commit 2e1dfd6

Browse files
committed
Merge branch 'cn/bom-in-gitignore'
Teach the codepaths that read .gitignore and .gitattributes files that these files encoded in UTF-8 may have UTF-8 BOM marker at the beginning; this makes it in line with what we do for configuration files already. * cn/bom-in-gitignore: attr: skip UTF8 BOM at the beginning of the input file config: use utf8_bom[] from utf.[ch] in git_parse_source() utf8-bom: introduce skip_utf8_bom() helper add_excludes_from_file: clarify the bom skipping logic dir: allow a BOM at the beginning of exclude files
2 parents 39a5d50 + 27547e5 commit 2e1dfd6

File tree

6 files changed

+39
-5
lines changed

6 files changed

+39
-5
lines changed

attr.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "exec_cmd.h"
1313
#include "attr.h"
1414
#include "dir.h"
15+
#include "utf8.h"
1516

1617
const char git_attr__true[] = "(builtin)true";
1718
const char git_attr__false[] = "\0(builtin)false";
@@ -379,8 +380,12 @@ static struct attr_stack *read_attr_from_file(const char *path, int macro_ok)
379380
return NULL;
380381
}
381382
res = xcalloc(1, sizeof(*res));
382-
while (fgets(buf, sizeof(buf), fp))
383-
handle_attr_line(res, buf, path, ++lineno, macro_ok);
383+
while (fgets(buf, sizeof(buf), fp)) {
384+
char *bufp = buf;
385+
if (!lineno)
386+
skip_utf8_bom(&bufp, strlen(bufp));
387+
handle_attr_line(res, bufp, path, ++lineno, macro_ok);
388+
}
384389
fclose(fp);
385390
return res;
386391
}

config.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "quote.h"
1313
#include "hashmap.h"
1414
#include "string-list.h"
15+
#include "utf8.h"
1516

1617
struct config_source {
1718
struct config_source *prev;
@@ -417,16 +418,15 @@ static int git_parse_source(config_fn_t fn, void *data)
417418
struct strbuf *var = &cf->var;
418419

419420
/* U+FEFF Byte Order Mark in UTF8 */
420-
static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
421-
const unsigned char *bomptr = utf8_bom;
421+
const char *bomptr = utf8_bom;
422422

423423
for (;;) {
424424
int c = get_next_char();
425425
if (bomptr && *bomptr) {
426426
/* We are at the file beginning; skip UTF8-encoded BOM
427427
* if present. Sane editors won't put this in on their
428428
* own, but e.g. Windows Notepad will do it happily. */
429-
if ((unsigned char) c == *bomptr) {
429+
if (c == (*bomptr & 0377)) {
430430
bomptr++;
431431
continue;
432432
} else {

dir.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "refs.h"
1313
#include "wildmatch.h"
1414
#include "pathspec.h"
15+
#include "utf8.h"
1516

1617
struct path_simplify {
1718
int len;
@@ -617,7 +618,12 @@ int add_excludes_from_file_to_list(const char *fname,
617618
}
618619

619620
el->filebuf = buf;
621+
622+
if (skip_utf8_bom(&buf, size))
623+
size -= buf - el->filebuf;
624+
620625
entry = buf;
626+
621627
for (i = 0; i < size; i++) {
622628
if (buf[i] == '\n') {
623629
if (entry != buf + i && entry[0] != '#') {

t/t7061-wtstatus-ignore.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@ test_expect_success 'status untracked directory with --ignored' '
2020
test_cmp expected actual
2121
'
2222

23+
test_expect_success 'same with gitignore starting with BOM' '
24+
printf "\357\273\277ignored\n" >.gitignore &&
25+
mkdir -p untracked &&
26+
: >untracked/ignored &&
27+
: >untracked/uncommitted &&
28+
git status --porcelain --ignored >actual &&
29+
test_cmp expected actual
30+
'
31+
2332
cat >expected <<\EOF
2433
?? .gitignore
2534
?? actual

utf8.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,3 +633,14 @@ int is_hfs_dotgit(const char *path)
633633

634634
return 1;
635635
}
636+
637+
const char utf8_bom[] = "\357\273\277";
638+
639+
int skip_utf8_bom(char **text, size_t len)
640+
{
641+
if (len < strlen(utf8_bom) ||
642+
memcmp(*text, utf8_bom, strlen(utf8_bom)))
643+
return 0;
644+
*text += strlen(utf8_bom);
645+
return 1;
646+
}

utf8.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ int same_encoding(const char *, const char *);
1313
__attribute__((format (printf, 2, 3)))
1414
int utf8_fprintf(FILE *, const char *, ...);
1515

16+
extern const char utf8_bom[];
17+
extern int skip_utf8_bom(char **, size_t);
18+
1619
void strbuf_add_wrapped_text(struct strbuf *buf,
1720
const char *text, int indent, int indent2, int width);
1821
void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,

0 commit comments

Comments
 (0)