@@ -266,6 +266,64 @@ static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
266
266
267
267
}
268
268
269
+ static int validate_encoding (const char * path , const char * enc ,
270
+ const char * data , size_t len , int die_on_error )
271
+ {
272
+ /* We only check for UTF here as UTF?? can be an alias for UTF-?? */
273
+ if (istarts_with (enc , "UTF" )) {
274
+ /*
275
+ * Check for detectable errors in UTF encodings
276
+ */
277
+ if (has_prohibited_utf_bom (enc , data , len )) {
278
+ const char * error_msg = _ (
279
+ "BOM is prohibited in '%s' if encoded as %s" );
280
+ /*
281
+ * This advice is shown for UTF-??BE and UTF-??LE encodings.
282
+ * We cut off the last two characters of the encoding name
283
+ * to generate the encoding name suitable for BOMs.
284
+ */
285
+ const char * advise_msg = _ (
286
+ "The file '%s' contains a byte order "
287
+ "mark (BOM). Please use UTF-%s as "
288
+ "working-tree-encoding." );
289
+ const char * stripped = NULL ;
290
+ char * upper = xstrdup_toupper (enc );
291
+ upper [strlen (upper )- 2 ] = '\0' ;
292
+ if (!skip_prefix (upper , "UTF-" , & stripped ))
293
+ skip_prefix (stripped , "UTF" , & stripped );
294
+ advise (advise_msg , path , stripped );
295
+ free (upper );
296
+ if (die_on_error )
297
+ die (error_msg , path , enc );
298
+ else {
299
+ return error (error_msg , path , enc );
300
+ }
301
+
302
+ } else if (is_missing_required_utf_bom (enc , data , len )) {
303
+ const char * error_msg = _ (
304
+ "BOM is required in '%s' if encoded as %s" );
305
+ const char * advise_msg = _ (
306
+ "The file '%s' is missing a byte order "
307
+ "mark (BOM). Please use UTF-%sBE or UTF-%sLE "
308
+ "(depending on the byte order) as "
309
+ "working-tree-encoding." );
310
+ const char * stripped = NULL ;
311
+ char * upper = xstrdup_toupper (enc );
312
+ if (!skip_prefix (upper , "UTF-" , & stripped ))
313
+ skip_prefix (stripped , "UTF" , & stripped );
314
+ advise (advise_msg , path , stripped , stripped );
315
+ free (upper );
316
+ if (die_on_error )
317
+ die (error_msg , path , enc );
318
+ else {
319
+ return error (error_msg , path , enc );
320
+ }
321
+ }
322
+
323
+ }
324
+ return 0 ;
325
+ }
326
+
269
327
static const char * default_encoding = "UTF-8" ;
270
328
271
329
static int encode_to_git (const char * path , const char * src , size_t src_len ,
@@ -291,6 +349,9 @@ static int encode_to_git(const char *path, const char *src, size_t src_len,
291
349
if (!buf && !src )
292
350
return 1 ;
293
351
352
+ if (validate_encoding (path , enc , src , src_len , die_on_error ))
353
+ return 0 ;
354
+
294
355
dst = reencode_string_len (src , src_len , default_encoding , enc ,
295
356
& dst_len );
296
357
if (!dst ) {
0 commit comments