7
7
#include "sigchain.h"
8
8
#include "pkt-line.h"
9
9
#include "sub-process.h"
10
+ #include "utf8.h"
10
11
11
12
/*
12
13
* convert.c - convert a file when checking it out and checking it in.
@@ -265,6 +266,78 @@ static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
265
266
266
267
}
267
268
269
+ static const char * default_encoding = "UTF-8" ;
270
+
271
+ static int encode_to_git (const char * path , const char * src , size_t src_len ,
272
+ struct strbuf * buf , const char * enc , int conv_flags )
273
+ {
274
+ char * dst ;
275
+ int dst_len ;
276
+ int die_on_error = conv_flags & CONV_WRITE_OBJECT ;
277
+
278
+ /*
279
+ * No encoding is specified or there is nothing to encode.
280
+ * Tell the caller that the content was not modified.
281
+ */
282
+ if (!enc || (src && !src_len ))
283
+ return 0 ;
284
+
285
+ /*
286
+ * Looks like we got called from "would_convert_to_git()".
287
+ * This means Git wants to know if it would encode (= modify!)
288
+ * the content. Let's answer with "yes", since an encoding was
289
+ * specified.
290
+ */
291
+ if (!buf && !src )
292
+ return 1 ;
293
+
294
+ dst = reencode_string_len (src , src_len , default_encoding , enc ,
295
+ & dst_len );
296
+ if (!dst ) {
297
+ /*
298
+ * We could add the blob "as-is" to Git. However, on checkout
299
+ * we would try to reencode to the original encoding. This
300
+ * would fail and we would leave the user with a messed-up
301
+ * working tree. Let's try to avoid this by screaming loud.
302
+ */
303
+ const char * msg = _ ("failed to encode '%s' from %s to %s" );
304
+ if (die_on_error )
305
+ die (msg , path , enc , default_encoding );
306
+ else {
307
+ error (msg , path , enc , default_encoding );
308
+ return 0 ;
309
+ }
310
+ }
311
+
312
+ strbuf_attach (buf , dst , dst_len , dst_len + 1 );
313
+ return 1 ;
314
+ }
315
+
316
+ static int encode_to_worktree (const char * path , const char * src , size_t src_len ,
317
+ struct strbuf * buf , const char * enc )
318
+ {
319
+ char * dst ;
320
+ int dst_len ;
321
+
322
+ /*
323
+ * No encoding is specified or there is nothing to encode.
324
+ * Tell the caller that the content was not modified.
325
+ */
326
+ if (!enc || (src && !src_len ))
327
+ return 0 ;
328
+
329
+ dst = reencode_string_len (src , src_len , enc , default_encoding ,
330
+ & dst_len );
331
+ if (!dst ) {
332
+ error ("failed to encode '%s' from %s to %s" ,
333
+ path , default_encoding , enc );
334
+ return 0 ;
335
+ }
336
+
337
+ strbuf_attach (buf , dst , dst_len , dst_len + 1 );
338
+ return 1 ;
339
+ }
340
+
268
341
static int crlf_to_git (const struct index_state * istate ,
269
342
const char * path , const char * src , size_t len ,
270
343
struct strbuf * buf ,
@@ -978,6 +1051,24 @@ static int ident_to_worktree(const char *path, const char *src, size_t len,
978
1051
return 1 ;
979
1052
}
980
1053
1054
+ static const char * git_path_check_encoding (struct attr_check_item * check )
1055
+ {
1056
+ const char * value = check -> value ;
1057
+
1058
+ if (ATTR_UNSET (value ) || !strlen (value ))
1059
+ return NULL ;
1060
+
1061
+ if (ATTR_TRUE (value ) || ATTR_FALSE (value )) {
1062
+ die (_ ("true/false are no valid working-tree-encodings" ));
1063
+ }
1064
+
1065
+ /* Don't encode to the default encoding */
1066
+ if (same_encoding (value , default_encoding ))
1067
+ return NULL ;
1068
+
1069
+ return value ;
1070
+ }
1071
+
981
1072
static enum crlf_action git_path_check_crlf (struct attr_check_item * check )
982
1073
{
983
1074
const char * value = check -> value ;
@@ -1033,6 +1124,7 @@ struct conv_attrs {
1033
1124
enum crlf_action attr_action ; /* What attr says */
1034
1125
enum crlf_action crlf_action ; /* When no attr is set, use core.autocrlf */
1035
1126
int ident ;
1127
+ const char * working_tree_encoding ; /* Supported encoding or default encoding if NULL */
1036
1128
};
1037
1129
1038
1130
static void convert_attrs (struct conv_attrs * ca , const char * path )
@@ -1041,7 +1133,8 @@ static void convert_attrs(struct conv_attrs *ca, const char *path)
1041
1133
1042
1134
if (!check ) {
1043
1135
check = attr_check_initl ("crlf" , "ident" , "filter" ,
1044
- "eol" , "text" , NULL );
1136
+ "eol" , "text" , "working-tree-encoding" ,
1137
+ NULL );
1045
1138
user_convert_tail = & user_convert ;
1046
1139
git_config (read_convert_config , NULL );
1047
1140
}
@@ -1064,6 +1157,7 @@ static void convert_attrs(struct conv_attrs *ca, const char *path)
1064
1157
else if (eol_attr == EOL_CRLF )
1065
1158
ca -> crlf_action = CRLF_TEXT_CRLF ;
1066
1159
}
1160
+ ca -> working_tree_encoding = git_path_check_encoding (ccheck + 5 );
1067
1161
} else {
1068
1162
ca -> drv = NULL ;
1069
1163
ca -> crlf_action = CRLF_UNDEFINED ;
@@ -1144,6 +1238,13 @@ int convert_to_git(const struct index_state *istate,
1144
1238
src = dst -> buf ;
1145
1239
len = dst -> len ;
1146
1240
}
1241
+
1242
+ ret |= encode_to_git (path , src , len , dst , ca .working_tree_encoding , conv_flags );
1243
+ if (ret && dst ) {
1244
+ src = dst -> buf ;
1245
+ len = dst -> len ;
1246
+ }
1247
+
1147
1248
if (!(conv_flags & CONV_EOL_KEEP_CRLF )) {
1148
1249
ret |= crlf_to_git (istate , path , src , len , dst , ca .crlf_action , conv_flags );
1149
1250
if (ret && dst ) {
@@ -1167,6 +1268,7 @@ void convert_to_git_filter_fd(const struct index_state *istate,
1167
1268
if (!apply_filter (path , NULL , 0 , fd , dst , ca .drv , CAP_CLEAN , NULL ))
1168
1269
die ("%s: clean filter '%s' failed" , path , ca .drv -> name );
1169
1270
1271
+ encode_to_git (path , dst -> buf , dst -> len , dst , ca .working_tree_encoding , conv_flags );
1170
1272
crlf_to_git (istate , path , dst -> buf , dst -> len , dst , ca .crlf_action , conv_flags );
1171
1273
ident_to_git (path , dst -> buf , dst -> len , dst , ca .ident );
1172
1274
}
@@ -1198,6 +1300,12 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
1198
1300
}
1199
1301
}
1200
1302
1303
+ ret |= encode_to_worktree (path , src , len , dst , ca .working_tree_encoding );
1304
+ if (ret ) {
1305
+ src = dst -> buf ;
1306
+ len = dst -> len ;
1307
+ }
1308
+
1201
1309
ret_filter = apply_filter (
1202
1310
path , src , len , -1 , dst , ca .drv , CAP_SMUDGE , dco );
1203
1311
if (!ret_filter && ca .drv && ca .drv -> required )
@@ -1664,6 +1772,9 @@ struct stream_filter *get_stream_filter(const char *path, const unsigned char *s
1664
1772
if (ca .drv && (ca .drv -> process || ca .drv -> smudge || ca .drv -> clean ))
1665
1773
return NULL ;
1666
1774
1775
+ if (ca .working_tree_encoding )
1776
+ return NULL ;
1777
+
1667
1778
if (ca .crlf_action == CRLF_AUTO || ca .crlf_action == CRLF_AUTO_CRLF )
1668
1779
return NULL ;
1669
1780
0 commit comments