@@ -87,88 +87,24 @@ static int test_vsnprintf(const char *fmt, ...)
87
87
88
88
static void init_gettext_charset (const char * domain )
89
89
{
90
- /*
91
- This trick arranges for messages to be emitted in the user's
92
- requested encoding, but avoids setting LC_CTYPE from the
93
- environment for the whole program.
94
-
95
- This primarily done to avoid a bug in vsnprintf in the GNU C
96
- Library [1]. which triggered a "your vsnprintf is broken" error
97
- on Git's own repository when inspecting v0.99.6~1 under a UTF-8
98
- locale.
99
-
100
- That commit contains a ISO-8859-1 encoded author name, which
101
- the locale aware vsnprintf(3) won't interpolate in the format
102
- argument, due to mismatch between the data encoding and the
103
- locale.
104
-
105
- Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at
106
- this point, because it'd require auditing all the code that uses C
107
- functions whose semantics are modified by LC_CTYPE.
108
-
109
- But only setting LC_MESSAGES as we do creates a problem, since
110
- we declare the encoding of our PO files[2] the gettext
111
- implementation will try to recode it to the user's locale, but
112
- without LC_CTYPE it'll emit something like this on 'git init'
113
- under the Icelandic locale:
114
-
115
- Bj? til t?ma Git lind ? /hlagh/.git/
116
-
117
- Gettext knows about the encoding of our PO file, but we haven't
118
- told it about the user's encoding, so all the non-US-ASCII
119
- characters get encoded to question marks.
120
-
121
- But we're in luck! We can set LC_CTYPE from the environment
122
- only while we call nl_langinfo and
123
- bind_textdomain_codeset. That suffices to tell gettext what
124
- encoding it should emit in, so it'll now say:
125
-
126
- Bjó til tóma Git lind í /hlagh/.git/
127
-
128
- And the equivalent ISO-8859-1 string will be emitted under a
129
- ISO-8859-1 locale.
130
-
131
- With this change way we get the advantages of setting LC_CTYPE
132
- (talk to the user in his language/encoding), without the major
133
- drawbacks (changed semantics for C functions we rely on).
134
-
135
- However foreign functions using other message catalogs that
136
- aren't using our neat trick will still have a problem, e.g. if
137
- we have to call perror(3):
138
-
139
- #include <stdio.h>
140
- #include <locale.h>
141
- #include <errno.h>
142
-
143
- int main(void)
144
- {
145
- setlocale(LC_MESSAGES, "");
146
- setlocale(LC_CTYPE, "C");
147
- errno = ENODEV;
148
- perror("test");
149
- return 0;
150
- }
151
-
152
- Running that will give you a message with question marks:
153
-
154
- $ LANGUAGE= LANG=de_DE.utf8 ./test
155
- test: Kein passendes Ger?t gefunden
156
-
157
- The vsnprintf bug has been fixed since glibc 2.17.
158
-
159
- Then we could simply set LC_CTYPE from the environment, which would
160
- make things like the external perror(3) messages work.
161
-
162
- See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for
163
- regression tests.
164
-
165
- 1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530
166
- 2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po
167
- */
168
90
setlocale (LC_CTYPE , "" );
169
91
charset = locale_charset ();
170
92
bind_textdomain_codeset (domain , charset );
171
- /* the string is taken from v0.99.6~1 */
93
+
94
+ /*
95
+ * Work around an old bug fixed in glibc 2.17 (released on
96
+ * 2012-12-24), at the cost of potentially making translated
97
+ * messages from external functions like perror() emitted in
98
+ * the wrong encoding.
99
+ *
100
+ * The bug affected e.g. git.git's own 7eb93c89651 ([PATCH]
101
+ * Simplify git script, 2005-09-07), which is the origin of
102
+ * the "David_K\345gedal" test string.
103
+ *
104
+ * See a much longer comment added to this file in 5e9637c6297
105
+ * (i18n: add infrastructure for translating Git with gettext,
106
+ * 2011-11-18) for more details.
107
+ */
172
108
if (test_vsnprintf ("%.*s" , 13 , "David_K\345gedal" ) < 0 )
173
109
setlocale (LC_CTYPE , "C" );
174
110
}
0 commit comments