Skip to content

Commit a7630bd

Browse files
tboegigitster
authored andcommitted
ls-files: add eol diagnostics
When working in a cross-platform environment, a user may want to check if text files are stored normalized in the repository and if .gitattributes are set appropriately. Make it possible to let Git show the line endings in the index and in the working tree and the effective text/eol attributes. The end of line ("eolinfo") are shown like this: "-text" binary (or with bare CR) file "none" text file without any EOL "lf" text file with LF "crlf" text file with CRLF "mixed" text file with mixed line endings. The effective text/eol attribute is one of these: "", "-text", "text", "text=auto", "text eol=lf", "text eol=crlf" git ls-files --eol gives an output like this: i/none w/none attr/text=auto t/t5100/empty i/-text w/-text attr/-text t/test-binary-2.png i/lf w/lf attr/text eol=lf t/t5100/rfc2047-info-0007 i/lf w/crlf attr/text eol=crlf doit.bat i/mixed w/mixed attr/ locale/XX.po to show what eol convention is used in the data in the index ('i'), and in the working tree ('w'), and what attribute is in effect, for each path that is shown. Add test cases in t0027. Helped-By: Eric Sunshine <[email protected]> Signed-off-by: Torsten Bögershausen <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 0c83680 commit a7630bd

File tree

5 files changed

+237
-49
lines changed

5 files changed

+237
-49
lines changed

Documentation/git-ls-files.txt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ SYNOPSIS
1212
'git ls-files' [-z] [-t] [-v]
1313
(--[cached|deleted|others|ignored|stage|unmerged|killed|modified])*
1414
(-[c|d|o|i|s|u|k|m])*
15+
[--eol]
1516
[-x <pattern>|--exclude=<pattern>]
1617
[-X <file>|--exclude-from=<file>]
1718
[--exclude-per-directory=<file>]
@@ -147,6 +148,24 @@ a space) at the start of each line:
147148
possible for manual inspection; the exact format may change at
148149
any time.
149150

151+
--eol::
152+
Show <eolinfo> and <eolattr> of files.
153+
<eolinfo> is the file content identification used by Git when
154+
the "text" attribute is "auto" (or not set and core.autocrlf is not false).
155+
<eolinfo> is either "-text", "none", "lf", "crlf", "mixed" or "".
156+
+
157+
"" means the file is not a regular file, it is not in the index or
158+
not accessable in the working tree.
159+
+
160+
<eolattr> is the attribute that is used when checking out or committing,
161+
it is either "", "-text", "text", "text=auto", "text eol=lf", "text eol=crlf".
162+
Note: Currently Git does not support "text=auto eol=lf" or "text=auto eol=crlf",
163+
that may change in the future.
164+
+
165+
Both the <eolinfo> in the index ("i/<eolinfo>")
166+
and in the working tree ("w/<eolinfo>") are shown for regular files,
167+
followed by the ("attr/<eolattr>").
168+
150169
\--::
151170
Do not interpret any more arguments as options.
152171

@@ -161,6 +180,9 @@ which case it outputs:
161180

162181
[<tag> ]<mode> <object> <stage> <file>
163182

183+
'git ls-files --eol' will show
184+
i/<eolinfo><SPACES>w/<eolinfo><SPACES>attr/<eolattr><SPACE*><TAB><file>
185+
164186
'git ls-files --unmerged' and 'git ls-files --stage' can be used to examine
165187
detailed information on unmerged paths.
166188

builtin/ls-files.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ static int show_killed;
2727
static int show_valid_bit;
2828
static int line_terminator = '\n';
2929
static int debug_mode;
30+
static int show_eol;
3031

3132
static const char *prefix;
3233
static int max_prefix_len;
@@ -47,6 +48,23 @@ static const char *tag_modified = "";
4748
static const char *tag_skip_worktree = "";
4849
static const char *tag_resolve_undo = "";
4950

51+
static void write_eolinfo(const struct cache_entry *ce, const char *path)
52+
{
53+
if (!show_eol)
54+
return;
55+
else {
56+
struct stat st;
57+
const char *i_txt = "";
58+
const char *w_txt = "";
59+
const char *a_txt = get_convert_attr_ascii(path);
60+
if (ce && S_ISREG(ce->ce_mode))
61+
i_txt = get_cached_convert_stats_ascii(ce->name);
62+
if (!lstat(path, &st) && S_ISREG(st.st_mode))
63+
w_txt = get_wt_convert_stats_ascii(path);
64+
printf("i/%-5s w/%-5s attr/%-17s\t", i_txt, w_txt, a_txt);
65+
}
66+
}
67+
5068
static void write_name(const char *name)
5169
{
5270
/*
@@ -68,6 +86,7 @@ static void show_dir_entry(const char *tag, struct dir_entry *ent)
6886
return;
6987

7088
fputs(tag, stdout);
89+
write_eolinfo(NULL, ent->name);
7190
write_name(ent->name);
7291
}
7392

@@ -170,6 +189,7 @@ static void show_ce_entry(const char *tag, const struct cache_entry *ce)
170189
find_unique_abbrev(ce->sha1,abbrev),
171190
ce_stage(ce));
172191
}
192+
write_eolinfo(ce, ce->name);
173193
write_name(ce->name);
174194
if (debug_mode) {
175195
const struct stat_data *sd = &ce->ce_stat_data;
@@ -433,6 +453,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
433453
OPT_BIT(0, "directory", &dir.flags,
434454
N_("show 'other' directories' names only"),
435455
DIR_SHOW_OTHER_DIRECTORIES),
456+
OPT_BOOL(0, "eol", &show_eol, N_("show line endings of files")),
436457
OPT_NEGBIT(0, "empty-directory", &dir.flags,
437458
N_("don't show empty directories"),
438459
DIR_HIDE_EMPTY_DIRECTORIES),

convert.c

Lines changed: 91 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313
* translation when the "text" attribute or "auto_crlf" option is set.
1414
*/
1515

16+
/* Stat bits: When BIN is set, the txt bits are unset */
17+
#define CONVERT_STAT_BITS_TXT_LF 0x1
18+
#define CONVERT_STAT_BITS_TXT_CRLF 0x2
19+
#define CONVERT_STAT_BITS_BIN 0x4
20+
1621
enum crlf_action {
1722
CRLF_GUESS = -1,
1823
CRLF_BINARY = 0,
@@ -75,26 +80,75 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
7580

7681
/*
7782
* The same heuristics as diff.c::mmfile_is_binary()
83+
* We treat files with bare CR as binary
7884
*/
79-
static int is_binary(unsigned long size, struct text_stat *stats)
85+
static int convert_is_binary(unsigned long size, const struct text_stat *stats)
8086
{
81-
87+
if (stats->cr != stats->crlf)
88+
return 1;
8289
if (stats->nul)
8390
return 1;
8491
if ((stats->printable >> 7) < stats->nonprintable)
8592
return 1;
86-
/*
87-
* Other heuristics? Average line length might be relevant,
88-
* as might LF vs CR vs CRLF counts..
89-
*
90-
* NOTE! It might be normal to have a low ratio of CRLF to LF
91-
* (somebody starts with a LF-only file and edits it with an editor
92-
* that adds CRLF only to lines that are added..). But do we
93-
* want to support CR-only? Probably not.
94-
*/
9593
return 0;
9694
}
9795

96+
static unsigned int gather_convert_stats(const char *data, unsigned long size)
97+
{
98+
struct text_stat stats;
99+
if (!data || !size)
100+
return 0;
101+
gather_stats(data, size, &stats);
102+
if (convert_is_binary(size, &stats))
103+
return CONVERT_STAT_BITS_BIN;
104+
else if (stats.crlf && stats.crlf == stats.lf)
105+
return CONVERT_STAT_BITS_TXT_CRLF;
106+
else if (stats.crlf && stats.lf)
107+
return CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_TXT_LF;
108+
else if (stats.lf)
109+
return CONVERT_STAT_BITS_TXT_LF;
110+
else
111+
return 0;
112+
}
113+
114+
static const char *gather_convert_stats_ascii(const char *data, unsigned long size)
115+
{
116+
unsigned int convert_stats = gather_convert_stats(data, size);
117+
118+
if (convert_stats & CONVERT_STAT_BITS_BIN)
119+
return "-text";
120+
switch (convert_stats) {
121+
case CONVERT_STAT_BITS_TXT_LF:
122+
return "lf";
123+
case CONVERT_STAT_BITS_TXT_CRLF:
124+
return "crlf";
125+
case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF:
126+
return "mixed";
127+
default:
128+
return "none";
129+
}
130+
}
131+
132+
const char *get_cached_convert_stats_ascii(const char *path)
133+
{
134+
const char *ret;
135+
unsigned long sz;
136+
void *data = read_blob_data_from_cache(path, &sz);
137+
ret = gather_convert_stats_ascii(data, sz);
138+
free(data);
139+
return ret;
140+
}
141+
142+
const char *get_wt_convert_stats_ascii(const char *path)
143+
{
144+
const char *ret = "";
145+
struct strbuf sb = STRBUF_INIT;
146+
if (strbuf_read_file(&sb, path, 0) >= 0)
147+
ret = gather_convert_stats_ascii(sb.buf, sb.len);
148+
strbuf_release(&sb);
149+
return ret;
150+
}
151+
98152
static enum eol output_eol(enum crlf_action crlf_action)
99153
{
100154
switch (crlf_action) {
@@ -187,18 +241,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
187241
gather_stats(src, len, &stats);
188242

189243
if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
190-
/*
191-
* We're currently not going to even try to convert stuff
192-
* that has bare CR characters. Does anybody do that crazy
193-
* stuff?
194-
*/
195-
if (stats.cr != stats.crlf)
196-
return 0;
197-
198-
/*
199-
* And add some heuristics for binary vs text, of course...
200-
*/
201-
if (is_binary(len, &stats))
244+
if (convert_is_binary(len, &stats))
202245
return 0;
203246

204247
if (crlf_action == CRLF_GUESS) {
@@ -277,11 +320,7 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
277320
return 0;
278321
}
279322

280-
/* If we have any bare CR characters, we're not going to touch it */
281-
if (stats.cr != stats.crlf)
282-
return 0;
283-
284-
if (is_binary(len, &stats))
323+
if (convert_is_binary(len, &stats))
285324
return 0;
286325
}
287326

@@ -777,6 +816,30 @@ int would_convert_to_git_filter_fd(const char *path)
777816
return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
778817
}
779818

819+
const char *get_convert_attr_ascii(const char *path)
820+
{
821+
struct conv_attrs ca;
822+
enum crlf_action crlf_action;
823+
824+
convert_attrs(&ca, path);
825+
crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
826+
switch (crlf_action) {
827+
case CRLF_GUESS:
828+
return "";
829+
case CRLF_BINARY:
830+
return "-text";
831+
case CRLF_TEXT:
832+
return "text";
833+
case CRLF_INPUT:
834+
return "text eol=lf";
835+
case CRLF_CRLF:
836+
return "text=auto eol=crlf";
837+
case CRLF_AUTO:
838+
return "text=auto";
839+
}
840+
return "";
841+
}
842+
780843
int convert_to_git(const char *path, const char *src, size_t len,
781844
struct strbuf *dst, enum safe_crlf checksafe)
782845
{

convert.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ enum eol {
3232
};
3333

3434
extern enum eol core_eol;
35+
extern const char *get_cached_convert_stats_ascii(const char *path);
36+
extern const char *get_wt_convert_stats_ascii(const char *path);
37+
extern const char *get_convert_attr_ascii(const char *path);
3538

3639
/* returns 1 if *dst was used */
3740
extern int convert_to_git(const char *path, const char *src, size_t len,

0 commit comments

Comments
 (0)