@@ -17,6 +17,7 @@ static int drivers_alloc;
1717 .cflags = REG_EXTENDED, \
1818 }, \
1919 .word_regex = wrx "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+", \
20+ .word_regex_multi_byte = wrx "|[^[:space:]]", \
2021}
2122#define IPATTERN (lang , rx , wrx ) { \
2223 .name = lang, \
@@ -26,6 +27,7 @@ static int drivers_alloc;
2627 .cflags = REG_EXTENDED | REG_ICASE, \
2728 }, \
2829 .word_regex = wrx "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+", \
30+ .word_regex_multi_byte = wrx "|[^[:space:]]", \
2931}
3032
3133/*
@@ -294,7 +296,7 @@ PATTERNS("scheme",
294296 /* All other words should be delimited by spaces or parentheses */
295297 "|([^][)(}{[ \t])+" ),
296298PATTERNS ("tex" , "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$" ,
297- "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff] +" ),
299+ "\\\\[a-zA-Z@]+|\\\\.|( [a-zA-Z0-9]|[^\x01-\x7f]) +" ),
298300{ "default" , NULL , NULL , -1 , { NULL , 0 } },
299301};
300302#undef PATTERNS
@@ -330,6 +332,25 @@ static int userdiff_find_by_namelen_cb(struct userdiff_driver *driver,
330332 return 0 ;
331333}
332334
335+ static int regexec_supports_multi_byte_chars (void )
336+ {
337+ static const char not_space [] = "[^[:space:]]" ;
338+ static const char utf8_multi_byte_char [] = "\xc2\xa3" ;
339+ regex_t re ;
340+ regmatch_t match ;
341+ static int result = -1 ;
342+
343+ if (result != -1 )
344+ return result ;
345+ if (regcomp (& re , not_space , REG_EXTENDED ))
346+ BUG ("invalid regular expression: %s" , not_space );
347+ result = !regexec (& re , utf8_multi_byte_char , 1 , & match , 0 ) &&
348+ match .rm_so == 0 &&
349+ match .rm_eo == strlen (utf8_multi_byte_char );
350+ regfree (& re );
351+ return result ;
352+ }
353+
333354static struct userdiff_driver * userdiff_find_by_namelen (const char * name , size_t len )
334355{
335356 struct find_by_namelen_data udcbdata = {
@@ -405,7 +426,13 @@ int userdiff_config(const char *k, const char *v)
405426struct userdiff_driver * userdiff_find_by_name (const char * name )
406427{
407428 int len = strlen (name );
408- return userdiff_find_by_namelen (name , len );
429+ struct userdiff_driver * driver = userdiff_find_by_namelen (name , len );
430+ if (driver && driver -> word_regex_multi_byte ) {
431+ if (regexec_supports_multi_byte_chars ())
432+ driver -> word_regex = driver -> word_regex_multi_byte ;
433+ driver -> word_regex_multi_byte = NULL ;
434+ }
435+ return driver ;
409436}
410437
411438struct userdiff_driver * userdiff_find_by_path (struct index_state * istate ,
0 commit comments