Skip to content

Commit 05f1539

Browse files
committed
Merge branch 'tb/ls-files-eol'
"git ls-files" learned a new "--eol" option to help diagnose end-of-line problems. * tb/ls-files-eol: ls-files: add eol diagnostics
2 parents 1cb3ed3 + a7630bd commit 05f1539

File tree

5 files changed

+237
-49
lines changed

5 files changed

+237
-49
lines changed

Documentation/git-ls-files.txt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ SYNOPSIS
1212
'git ls-files' [-z] [-t] [-v]
1313
(--[cached|deleted|others|ignored|stage|unmerged|killed|modified])*
1414
(-[c|d|o|i|s|u|k|m])*
15+
[--eol]
1516
[-x <pattern>|--exclude=<pattern>]
1617
[-X <file>|--exclude-from=<file>]
1718
[--exclude-per-directory=<file>]
@@ -147,6 +148,24 @@ a space) at the start of each line:
147148
possible for manual inspection; the exact format may change at
148149
any time.
149150

151+
--eol::
152+
Show <eolinfo> and <eolattr> of files.
153+
<eolinfo> is the file content identification used by Git when
154+
the "text" attribute is "auto" (or not set and core.autocrlf is not false).
155+
<eolinfo> is either "-text", "none", "lf", "crlf", "mixed" or "".
156+
+
157+
"" means the file is not a regular file, it is not in the index or
158+
not accessable in the working tree.
159+
+
160+
<eolattr> is the attribute that is used when checking out or committing,
161+
it is either "", "-text", "text", "text=auto", "text eol=lf", "text eol=crlf".
162+
Note: Currently Git does not support "text=auto eol=lf" or "text=auto eol=crlf",
163+
that may change in the future.
164+
+
165+
Both the <eolinfo> in the index ("i/<eolinfo>")
166+
and in the working tree ("w/<eolinfo>") are shown for regular files,
167+
followed by the ("attr/<eolattr>").
168+
150169
\--::
151170
Do not interpret any more arguments as options.
152171

@@ -161,6 +180,9 @@ which case it outputs:
161180

162181
[<tag> ]<mode> <object> <stage> <file>
163182

183+
'git ls-files --eol' will show
184+
i/<eolinfo><SPACES>w/<eolinfo><SPACES>attr/<eolattr><SPACE*><TAB><file>
185+
164186
'git ls-files --unmerged' and 'git ls-files --stage' can be used to examine
165187
detailed information on unmerged paths.
166188

builtin/ls-files.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ static int show_killed;
2727
static int show_valid_bit;
2828
static int line_terminator = '\n';
2929
static int debug_mode;
30+
static int show_eol;
3031

3132
static const char *prefix;
3233
static int max_prefix_len;
@@ -47,6 +48,23 @@ static const char *tag_modified = "";
4748
static const char *tag_skip_worktree = "";
4849
static const char *tag_resolve_undo = "";
4950

51+
static void write_eolinfo(const struct cache_entry *ce, const char *path)
52+
{
53+
if (!show_eol)
54+
return;
55+
else {
56+
struct stat st;
57+
const char *i_txt = "";
58+
const char *w_txt = "";
59+
const char *a_txt = get_convert_attr_ascii(path);
60+
if (ce && S_ISREG(ce->ce_mode))
61+
i_txt = get_cached_convert_stats_ascii(ce->name);
62+
if (!lstat(path, &st) && S_ISREG(st.st_mode))
63+
w_txt = get_wt_convert_stats_ascii(path);
64+
printf("i/%-5s w/%-5s attr/%-17s\t", i_txt, w_txt, a_txt);
65+
}
66+
}
67+
5068
static void write_name(const char *name)
5169
{
5270
/*
@@ -68,6 +86,7 @@ static void show_dir_entry(const char *tag, struct dir_entry *ent)
6886
return;
6987

7088
fputs(tag, stdout);
89+
write_eolinfo(NULL, ent->name);
7190
write_name(ent->name);
7291
}
7392

@@ -170,6 +189,7 @@ static void show_ce_entry(const char *tag, const struct cache_entry *ce)
170189
find_unique_abbrev(ce->sha1,abbrev),
171190
ce_stage(ce));
172191
}
192+
write_eolinfo(ce, ce->name);
173193
write_name(ce->name);
174194
if (debug_mode) {
175195
const struct stat_data *sd = &ce->ce_stat_data;
@@ -433,6 +453,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
433453
OPT_BIT(0, "directory", &dir.flags,
434454
N_("show 'other' directories' names only"),
435455
DIR_SHOW_OTHER_DIRECTORIES),
456+
OPT_BOOL(0, "eol", &show_eol, N_("show line endings of files")),
436457
OPT_NEGBIT(0, "empty-directory", &dir.flags,
437458
N_("don't show empty directories"),
438459
DIR_HIDE_EMPTY_DIRECTORIES),

convert.c

Lines changed: 91 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313
* translation when the "text" attribute or "auto_crlf" option is set.
1414
*/
1515

16+
/* Stat bits: When BIN is set, the txt bits are unset */
17+
#define CONVERT_STAT_BITS_TXT_LF 0x1
18+
#define CONVERT_STAT_BITS_TXT_CRLF 0x2
19+
#define CONVERT_STAT_BITS_BIN 0x4
20+
1621
enum crlf_action {
1722
CRLF_GUESS = -1,
1823
CRLF_BINARY = 0,
@@ -75,26 +80,75 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
7580

7681
/*
7782
* The same heuristics as diff.c::mmfile_is_binary()
83+
* We treat files with bare CR as binary
7884
*/
79-
static int is_binary(unsigned long size, struct text_stat *stats)
85+
static int convert_is_binary(unsigned long size, const struct text_stat *stats)
8086
{
81-
87+
if (stats->cr != stats->crlf)
88+
return 1;
8289
if (stats->nul)
8390
return 1;
8491
if ((stats->printable >> 7) < stats->nonprintable)
8592
return 1;
86-
/*
87-
* Other heuristics? Average line length might be relevant,
88-
* as might LF vs CR vs CRLF counts..
89-
*
90-
* NOTE! It might be normal to have a low ratio of CRLF to LF
91-
* (somebody starts with a LF-only file and edits it with an editor
92-
* that adds CRLF only to lines that are added..). But do we
93-
* want to support CR-only? Probably not.
94-
*/
9593
return 0;
9694
}
9795

96+
static unsigned int gather_convert_stats(const char *data, unsigned long size)
97+
{
98+
struct text_stat stats;
99+
if (!data || !size)
100+
return 0;
101+
gather_stats(data, size, &stats);
102+
if (convert_is_binary(size, &stats))
103+
return CONVERT_STAT_BITS_BIN;
104+
else if (stats.crlf && stats.crlf == stats.lf)
105+
return CONVERT_STAT_BITS_TXT_CRLF;
106+
else if (stats.crlf && stats.lf)
107+
return CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_TXT_LF;
108+
else if (stats.lf)
109+
return CONVERT_STAT_BITS_TXT_LF;
110+
else
111+
return 0;
112+
}
113+
114+
static const char *gather_convert_stats_ascii(const char *data, unsigned long size)
115+
{
116+
unsigned int convert_stats = gather_convert_stats(data, size);
117+
118+
if (convert_stats & CONVERT_STAT_BITS_BIN)
119+
return "-text";
120+
switch (convert_stats) {
121+
case CONVERT_STAT_BITS_TXT_LF:
122+
return "lf";
123+
case CONVERT_STAT_BITS_TXT_CRLF:
124+
return "crlf";
125+
case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF:
126+
return "mixed";
127+
default:
128+
return "none";
129+
}
130+
}
131+
132+
const char *get_cached_convert_stats_ascii(const char *path)
133+
{
134+
const char *ret;
135+
unsigned long sz;
136+
void *data = read_blob_data_from_cache(path, &sz);
137+
ret = gather_convert_stats_ascii(data, sz);
138+
free(data);
139+
return ret;
140+
}
141+
142+
const char *get_wt_convert_stats_ascii(const char *path)
143+
{
144+
const char *ret = "";
145+
struct strbuf sb = STRBUF_INIT;
146+
if (strbuf_read_file(&sb, path, 0) >= 0)
147+
ret = gather_convert_stats_ascii(sb.buf, sb.len);
148+
strbuf_release(&sb);
149+
return ret;
150+
}
151+
98152
static enum eol output_eol(enum crlf_action crlf_action)
99153
{
100154
switch (crlf_action) {
@@ -187,18 +241,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
187241
gather_stats(src, len, &stats);
188242

189243
if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
190-
/*
191-
* We're currently not going to even try to convert stuff
192-
* that has bare CR characters. Does anybody do that crazy
193-
* stuff?
194-
*/
195-
if (stats.cr != stats.crlf)
196-
return 0;
197-
198-
/*
199-
* And add some heuristics for binary vs text, of course...
200-
*/
201-
if (is_binary(len, &stats))
244+
if (convert_is_binary(len, &stats))
202245
return 0;
203246

204247
if (crlf_action == CRLF_GUESS) {
@@ -277,11 +320,7 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
277320
return 0;
278321
}
279322

280-
/* If we have any bare CR characters, we're not going to touch it */
281-
if (stats.cr != stats.crlf)
282-
return 0;
283-
284-
if (is_binary(len, &stats))
323+
if (convert_is_binary(len, &stats))
285324
return 0;
286325
}
287326

@@ -777,6 +816,30 @@ int would_convert_to_git_filter_fd(const char *path)
777816
return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
778817
}
779818

819+
const char *get_convert_attr_ascii(const char *path)
820+
{
821+
struct conv_attrs ca;
822+
enum crlf_action crlf_action;
823+
824+
convert_attrs(&ca, path);
825+
crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
826+
switch (crlf_action) {
827+
case CRLF_GUESS:
828+
return "";
829+
case CRLF_BINARY:
830+
return "-text";
831+
case CRLF_TEXT:
832+
return "text";
833+
case CRLF_INPUT:
834+
return "text eol=lf";
835+
case CRLF_CRLF:
836+
return "text=auto eol=crlf";
837+
case CRLF_AUTO:
838+
return "text=auto";
839+
}
840+
return "";
841+
}
842+
780843
int convert_to_git(const char *path, const char *src, size_t len,
781844
struct strbuf *dst, enum safe_crlf checksafe)
782845
{

convert.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ enum eol {
3232
};
3333

3434
extern enum eol core_eol;
35+
extern const char *get_cached_convert_stats_ascii(const char *path);
36+
extern const char *get_wt_convert_stats_ascii(const char *path);
37+
extern const char *get_convert_attr_ascii(const char *path);
3538

3639
/* returns 1 if *dst was used */
3740
extern int convert_to_git(const char *path, const char *src, size_t len,

0 commit comments

Comments
 (0)