|
13 | 13 | * translation when the "text" attribute or "auto_crlf" option is set.
|
14 | 14 | */
|
15 | 15 |
|
| 16 | +/* Stat bits: When BIN is set, the txt bits are unset */ |
| 17 | +#define CONVERT_STAT_BITS_TXT_LF 0x1 |
| 18 | +#define CONVERT_STAT_BITS_TXT_CRLF 0x2 |
| 19 | +#define CONVERT_STAT_BITS_BIN 0x4 |
| 20 | + |
16 | 21 | enum crlf_action {
|
17 | 22 | CRLF_GUESS = -1,
|
18 | 23 | CRLF_BINARY = 0,
|
@@ -75,26 +80,75 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
|
75 | 80 |
|
76 | 81 | /*
|
77 | 82 | * The same heuristics as diff.c::mmfile_is_binary()
|
| 83 | + * We treat files with bare CR as binary |
78 | 84 | */
|
79 |
| -static int is_binary(unsigned long size, struct text_stat *stats) |
| 85 | +static int convert_is_binary(unsigned long size, const struct text_stat *stats) |
80 | 86 | {
|
81 |
| - |
| 87 | + if (stats->cr != stats->crlf) |
| 88 | + return 1; |
82 | 89 | if (stats->nul)
|
83 | 90 | return 1;
|
84 | 91 | if ((stats->printable >> 7) < stats->nonprintable)
|
85 | 92 | return 1;
|
86 |
| - /* |
87 |
| - * Other heuristics? Average line length might be relevant, |
88 |
| - * as might LF vs CR vs CRLF counts.. |
89 |
| - * |
90 |
| - * NOTE! It might be normal to have a low ratio of CRLF to LF |
91 |
| - * (somebody starts with a LF-only file and edits it with an editor |
92 |
| - * that adds CRLF only to lines that are added..). But do we |
93 |
| - * want to support CR-only? Probably not. |
94 |
| - */ |
95 | 93 | return 0;
|
96 | 94 | }
|
97 | 95 |
|
| 96 | +static unsigned int gather_convert_stats(const char *data, unsigned long size) |
| 97 | +{ |
| 98 | + struct text_stat stats; |
| 99 | + if (!data || !size) |
| 100 | + return 0; |
| 101 | + gather_stats(data, size, &stats); |
| 102 | + if (convert_is_binary(size, &stats)) |
| 103 | + return CONVERT_STAT_BITS_BIN; |
| 104 | + else if (stats.crlf && stats.crlf == stats.lf) |
| 105 | + return CONVERT_STAT_BITS_TXT_CRLF; |
| 106 | + else if (stats.crlf && stats.lf) |
| 107 | + return CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_TXT_LF; |
| 108 | + else if (stats.lf) |
| 109 | + return CONVERT_STAT_BITS_TXT_LF; |
| 110 | + else |
| 111 | + return 0; |
| 112 | +} |
| 113 | + |
| 114 | +static const char *gather_convert_stats_ascii(const char *data, unsigned long size) |
| 115 | +{ |
| 116 | + unsigned int convert_stats = gather_convert_stats(data, size); |
| 117 | + |
| 118 | + if (convert_stats & CONVERT_STAT_BITS_BIN) |
| 119 | + return "-text"; |
| 120 | + switch (convert_stats) { |
| 121 | + case CONVERT_STAT_BITS_TXT_LF: |
| 122 | + return "lf"; |
| 123 | + case CONVERT_STAT_BITS_TXT_CRLF: |
| 124 | + return "crlf"; |
| 125 | + case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF: |
| 126 | + return "mixed"; |
| 127 | + default: |
| 128 | + return "none"; |
| 129 | + } |
| 130 | +} |
| 131 | + |
| 132 | +const char *get_cached_convert_stats_ascii(const char *path) |
| 133 | +{ |
| 134 | + const char *ret; |
| 135 | + unsigned long sz; |
| 136 | + void *data = read_blob_data_from_cache(path, &sz); |
| 137 | + ret = gather_convert_stats_ascii(data, sz); |
| 138 | + free(data); |
| 139 | + return ret; |
| 140 | +} |
| 141 | + |
| 142 | +const char *get_wt_convert_stats_ascii(const char *path) |
| 143 | +{ |
| 144 | + const char *ret = ""; |
| 145 | + struct strbuf sb = STRBUF_INIT; |
| 146 | + if (strbuf_read_file(&sb, path, 0) >= 0) |
| 147 | + ret = gather_convert_stats_ascii(sb.buf, sb.len); |
| 148 | + strbuf_release(&sb); |
| 149 | + return ret; |
| 150 | +} |
| 151 | + |
98 | 152 | static enum eol output_eol(enum crlf_action crlf_action)
|
99 | 153 | {
|
100 | 154 | switch (crlf_action) {
|
@@ -187,18 +241,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
|
187 | 241 | gather_stats(src, len, &stats);
|
188 | 242 |
|
189 | 243 | if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
|
190 |
| - /* |
191 |
| - * We're currently not going to even try to convert stuff |
192 |
| - * that has bare CR characters. Does anybody do that crazy |
193 |
| - * stuff? |
194 |
| - */ |
195 |
| - if (stats.cr != stats.crlf) |
196 |
| - return 0; |
197 |
| - |
198 |
| - /* |
199 |
| - * And add some heuristics for binary vs text, of course... |
200 |
| - */ |
201 |
| - if (is_binary(len, &stats)) |
| 244 | + if (convert_is_binary(len, &stats)) |
202 | 245 | return 0;
|
203 | 246 |
|
204 | 247 | if (crlf_action == CRLF_GUESS) {
|
@@ -277,11 +320,7 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
|
277 | 320 | return 0;
|
278 | 321 | }
|
279 | 322 |
|
280 |
| - /* If we have any bare CR characters, we're not going to touch it */ |
281 |
| - if (stats.cr != stats.crlf) |
282 |
| - return 0; |
283 |
| - |
284 |
| - if (is_binary(len, &stats)) |
| 323 | + if (convert_is_binary(len, &stats)) |
285 | 324 | return 0;
|
286 | 325 | }
|
287 | 326 |
|
@@ -777,6 +816,30 @@ int would_convert_to_git_filter_fd(const char *path)
|
777 | 816 | return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
|
778 | 817 | }
|
779 | 818 |
|
| 819 | +const char *get_convert_attr_ascii(const char *path) |
| 820 | +{ |
| 821 | + struct conv_attrs ca; |
| 822 | + enum crlf_action crlf_action; |
| 823 | + |
| 824 | + convert_attrs(&ca, path); |
| 825 | + crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); |
| 826 | + switch (crlf_action) { |
| 827 | + case CRLF_GUESS: |
| 828 | + return ""; |
| 829 | + case CRLF_BINARY: |
| 830 | + return "-text"; |
| 831 | + case CRLF_TEXT: |
| 832 | + return "text"; |
| 833 | + case CRLF_INPUT: |
| 834 | + return "text eol=lf"; |
| 835 | + case CRLF_CRLF: |
| 836 | + return "text=auto eol=crlf"; |
| 837 | + case CRLF_AUTO: |
| 838 | + return "text=auto"; |
| 839 | + } |
| 840 | + return ""; |
| 841 | +} |
| 842 | + |
780 | 843 | int convert_to_git(const char *path, const char *src, size_t len,
|
781 | 844 | struct strbuf *dst, enum safe_crlf checksafe)
|
782 | 845 | {
|
|
0 commit comments