@@ -17,6 +17,7 @@ static int drivers_alloc;
17
17
.cflags = REG_EXTENDED, \
18
18
}, \
19
19
.word_regex = wrx "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+", \
20
+ .word_regex_multi_byte = wrx "|[^[:space:]]", \
20
21
}
21
22
#define IPATTERN (lang , rx , wrx ) { \
22
23
.name = lang, \
@@ -26,6 +27,7 @@ static int drivers_alloc;
26
27
.cflags = REG_EXTENDED | REG_ICASE, \
27
28
}, \
28
29
.word_regex = wrx "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+", \
30
+ .word_regex_multi_byte = wrx "|[^[:space:]]", \
29
31
}
30
32
31
33
/*
@@ -294,7 +296,7 @@ PATTERNS("scheme",
294
296
/* All other words should be delimited by spaces or parentheses */
295
297
"|([^][)(}{[ \t])+" ),
296
298
PATTERNS ("tex" , "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$" ,
297
- "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff] +" ),
299
+ "\\\\[a-zA-Z@]+|\\\\.|( [a-zA-Z0-9]|[^\x01-\x7f]) +" ),
298
300
{ "default" , NULL , NULL , -1 , { NULL , 0 } },
299
301
};
300
302
#undef PATTERNS
@@ -330,6 +332,25 @@ static int userdiff_find_by_namelen_cb(struct userdiff_driver *driver,
330
332
return 0 ;
331
333
}
332
334
335
+ static int regexec_supports_multi_byte_chars (void )
336
+ {
337
+ static const char not_space [] = "[^[:space:]]" ;
338
+ static const char utf8_multi_byte_char [] = "\xc2\xa3" ;
339
+ regex_t re ;
340
+ regmatch_t match ;
341
+ static int result = -1 ;
342
+
343
+ if (result != -1 )
344
+ return result ;
345
+ if (regcomp (& re , not_space , REG_EXTENDED ))
346
+ BUG ("invalid regular expression: %s" , not_space );
347
+ result = !regexec (& re , utf8_multi_byte_char , 1 , & match , 0 ) &&
348
+ match .rm_so == 0 &&
349
+ match .rm_eo == strlen (utf8_multi_byte_char );
350
+ regfree (& re );
351
+ return result ;
352
+ }
353
+
333
354
static struct userdiff_driver * userdiff_find_by_namelen (const char * name , size_t len )
334
355
{
335
356
struct find_by_namelen_data udcbdata = {
@@ -405,7 +426,13 @@ int userdiff_config(const char *k, const char *v)
405
426
struct userdiff_driver * userdiff_find_by_name (const char * name )
406
427
{
407
428
int len = strlen (name );
408
- return userdiff_find_by_namelen (name , len );
429
+ struct userdiff_driver * driver = userdiff_find_by_namelen (name , len );
430
+ if (driver && driver -> word_regex_multi_byte ) {
431
+ if (regexec_supports_multi_byte_chars ())
432
+ driver -> word_regex = driver -> word_regex_multi_byte ;
433
+ driver -> word_regex_multi_byte = NULL ;
434
+ }
435
+ return driver ;
409
436
}
410
437
411
438
struct userdiff_driver * userdiff_find_by_path (struct index_state * istate ,
0 commit comments