Skip to content

Commit 1819ad3

Browse files
dspinellisgitster
authored andcommitted
grep: fix multibyte regex handling under macOS
The commit 29de205 (Makefile: fix default regex settings on Darwin, 2013-05-11) fixed t0070-fundamental.sh under Darwin (macOS) by adopting Git's regex library. However, this library is compiled with NO_MBSUPPORT, which causes git-grep to work incorrectly on multibyte (e.g. UTF-8) files. Current macOS versions pass t0070-fundamental.sh with the native macOS regex library, which also supports multibyte characters. Adjust the Makefile to use the native regex library, and call setlocale(3) to set CTYPE according to the user's preference. The setlocale call is required on all platforms, but in platforms supporting gettext(3), setlocale was called as a side-effect of initializing gettext. Therefore, move the CTYPE setlocale call from gettext.c to common-main.c and the corresponding locale.h include into git-compat-util.h. Thanks to the global initialization of CTYPE setlocale, the test-tool regex command now works correctly with supported multibyte regexes, and is used to set the MB_REGEX test prerequisite by assessing a platform's support for them. Signed-off-by: Diomidis Spinellis <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 795ea87 commit 1819ad3

File tree

5 files changed

+18
-3
lines changed

5 files changed

+18
-3
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1434,7 +1434,6 @@ ifeq ($(uname_S),Darwin)
14341434
APPLE_COMMON_CRYPTO = YesPlease
14351435
COMPAT_CFLAGS += -DAPPLE_COMMON_CRYPTO
14361436
endif
1437-
NO_REGEX = YesPlease
14381437
PTHREAD_LIBS =
14391438
endif
14401439

@@ -2979,6 +2978,7 @@ GIT-BUILD-OPTIONS: FORCE
29792978
@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+
29802979
@echo NO_PTHREADS=\''$(subst ','\'',$(subst ','\'',$(NO_PTHREADS)))'\' >>$@+
29812980
@echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+
2981+
@echo NO_REGEX=\''$(subst ','\'',$(subst ','\'',$(NO_REGEX)))'\' >>$@+
29822982
@echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@+
29832983
@echo PAGER_ENV=\''$(subst ','\'',$(subst ','\'',$(PAGER_ENV)))'\' >>$@+
29842984
@echo DC_SHA1=\''$(subst ','\'',$(subst ','\'',$(DC_SHA1)))'\' >>$@+

common-main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ int main(int argc, const char **argv)
4040

4141
git_resolve_executable_dir(argv[0]);
4242

43+
setlocale(LC_CTYPE, "");
4344
git_setup_gettext();
4445

4546
initialize_the_repository();

gettext.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include "config.h"
1111

1212
#ifndef NO_GETTEXT
13-
# include <locale.h>
1413
# include <libintl.h>
1514
# ifdef GIT_WINDOWS_NATIVE
1615

@@ -80,7 +79,6 @@ static int test_vsnprintf(const char *fmt, ...)
8079

8180
static void init_gettext_charset(const char *domain)
8281
{
83-
setlocale(LC_CTYPE, "");
8482
charset = locale_charset();
8583
bind_textdomain_codeset(domain, charset);
8684

git-compat-util.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ struct strbuf;
215215
#endif
216216
#include <errno.h>
217217
#include <limits.h>
218+
#include <locale.h>
218219
#ifdef NEEDS_SYS_PARAM_H
219220
#include <sys/param.h>
220221
#endif

t/t7810-grep.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ test_invalid_grep_expression() {
1818
'
1919
}
2020

21+
LC_ALL=en_US.UTF-8 test-tool regex '^.$' '¿' &&
22+
test_set_prereq MB_REGEX
23+
2124
cat >hello.c <<EOF
2225
#include <assert.h>
2326
#include <stdio.h>
@@ -88,6 +91,10 @@ test_expect_success setup '
8891
echo unusual >"\"unusual\" pathname" &&
8992
echo unusual >"t/nested \"unusual\" pathname"
9093
fi &&
94+
if test_have_prereq MB_REGEX
95+
then
96+
echo "¿" >reverse-question-mark
97+
fi &&
9198
git add . &&
9299
test_tick &&
93100
git commit -m initial
@@ -569,6 +576,14 @@ do
569576
'
570577
done
571578

579+
test_expect_success MB_REGEX 'grep exactly one char in single-char multibyte file' '
580+
LC_ALL=en_US.UTF-8 git grep "^.$" reverse-question-mark
581+
'
582+
583+
test_expect_success MB_REGEX 'grep two chars in single-char multibyte file' '
584+
LC_ALL=en_US.UTF-8 test_expect_code 1 git grep ".." reverse-question-mark
585+
'
586+
572587
cat >expected <<EOF
573588
file
574589
EOF

0 commit comments

Comments
 (0)