Skip to content

Commit 6af0187

Browse files
committed
locale.c: Rewrite localeconv() handling
localeconv() returns a structure contaiing fields that are associated with two different categories: LC_NUMERIC and LC_MONETARY. Perl via POSIX::localeconv() reutrns a hash containing all the fields. Testing on Windows showed that if LC_CTYPE is not the same locale as LC_MONETARY for the monetary fields, or isn't the same as LC_NUMERIC for the numeric ones, mojibake can result. The solution to similar situations elsewhere in the code is to toggle LC_CTYPE into being the same locale as the one for the returned fields. But those situations only have a single locale that LC_CTYPE has to match, so it doesn't work here when LC_NUMERIC and LC_MONETARY are different locales. Unlike Schrödinger's cat, LC_CTYPE has to be one or the other, not both at the same time. The previous implementation did not consider this possibility, and wasn't easily changeable to work. Therefore, this rewrites a bunch of it. The solution used is to call localeconv() twice when the LC_NUMERIC locale and the LC_MONETARY locale don't match (with LC_CTYPE toggled to the corresponding one each time). (Only one call is made if the two categories have the same locale.) This one vs two complicated the code, but I thought it was worth it given that the one call is the most likely case. Another complication is that on platforms that lack nl_langinfo(), (Windows, for example), localeconv() is used to emulate portions of it. Previously there was a separate function to handle this, using an SV() cast as an HV() to avoid using a hash that wasn't actually necessary. That proved to lead to extra duplicated code under the new scheme, so that function was collapsed into a single one and a real hash is used in all circumstances, but is only populated with the one or two fields needed for the emulation. The only part of this commit that I thought could be split off from the rest concerns the fact that localeconv()'s return is not thread-safe, and so must be copied to a safe place (the hash) while in a critical section, locking out all other threads. Before this commit, that copying was accompanied by determining if each string field needed to be marked as UTF-8. That determination isn't necessarily trivial, so should really not be in the critical section. This commit does that. And, with some effort, that part could have been split into a separate commit. but I didn't think it was worth the effort.
1 parent 2fc8f38 commit 6af0187

File tree

5 files changed

+569
-399
lines changed

5 files changed

+569
-399
lines changed

embed.fnc

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3336,6 +3336,15 @@ S |utf8ness_t|get_locale_string_utf8ness_i \
33363336
|NULLOK const char * locale \
33373337
|const unsigned cat_index
33383338
S |bool |is_locale_utf8 |NN const char * locale
3339+
# ifdef HAS_LOCALECONV
3340+
S |HV * |my_localeconv |const int item
3341+
S |void |populate_hash_from_localeconv \
3342+
|NN HV * hv \
3343+
|NN const char * locale \
3344+
|const U32 which_mask \
3345+
|NN const lconv_offset_t * strings[2] \
3346+
|NULLOK const lconv_offset_t * integers
3347+
# endif
33393348
# ifdef USE_LOCALE
33403349
iR |const char *|mortalized_pv_copy|NULLOK const char * const pv
33413350
ST |const char *|save_to_buffer|NULLOK const char * string \
@@ -3366,21 +3375,6 @@ So |const char *|toggle_locale_i|const unsigned switch_cat_index \
33663375
So |void |restore_toggled_locale_i|const unsigned cat_index \
33673376
|NULLOK const char * original_locale \
33683377
|const line_t caller_line
3369-
# if (defined(HAS_LOCALECONV) || defined(HAS_LOCALECONV_L)) \
3370-
&& (defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC))
3371-
S |HV * |my_localeconv|const int item
3372-
S |HV * |populate_localeconv|NN const struct lconv *lcbuf \
3373-
|const int unused \
3374-
|const locale_utf8ness_t numeric_locale_is_utf8 \
3375-
|const locale_utf8ness_t monetary_locale_is_utf8
3376-
# if ! defined(HAS_NL_LANGINFO_L) && ! defined(HAS_NL_LANGINFO)
3377-
S |HV * |get_nl_item_from_localeconv \
3378-
|NN const struct lconv *lcbuf \
3379-
|const int item \
3380-
|const locale_utf8ness_t unused1 \
3381-
|const locale_utf8ness_t unused2
3382-
# endif
3383-
# endif
33843378
# if defined(USE_POSIX_2008_LOCALE)
33853379
S |const char*|emulate_setlocale_i|const unsigned int index \
33863380
|NULLOK const char* new_locale \

embed.h

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,15 +1492,6 @@
14921492
#define yyparse(a) Perl_yyparse(aTHX_ a)
14931493
#define yyquit() Perl_yyquit(aTHX)
14941494
#define yyunlex() Perl_yyunlex(aTHX)
1495-
# if ! defined(HAS_NL_LANGINFO_L) && ! defined(HAS_NL_LANGINFO)
1496-
# if (defined(HAS_LOCALECONV) || defined(HAS_LOCALECONV_L)) && (defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC))
1497-
# if defined(PERL_IN_LOCALE_C)
1498-
# if defined(USE_LOCALE)
1499-
#define get_nl_item_from_localeconv(a,b,c,d) S_get_nl_item_from_localeconv(aTHX_ a,b,c,d)
1500-
# endif
1501-
# endif
1502-
# endif
1503-
# endif
15041495
# if !(defined(DEBUGGING))
15051496
# if !defined(NV_PRESERVES_UV)
15061497
# if defined(PERL_IN_SV_C)
@@ -1608,14 +1599,6 @@
16081599
# if !defined(WIN32)
16091600
#define do_exec3(a,b,c) Perl_do_exec3(aTHX_ a,b,c)
16101601
# endif
1611-
# if (defined(HAS_LOCALECONV) || defined(HAS_LOCALECONV_L)) && (defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC))
1612-
# if defined(PERL_IN_LOCALE_C)
1613-
# if defined(USE_LOCALE)
1614-
#define my_localeconv(a) S_my_localeconv(aTHX_ a)
1615-
#define populate_localeconv(a,b,c,d) S_populate_localeconv(aTHX_ a,b,c,d)
1616-
# endif
1617-
# endif
1618-
# endif
16191602
# if 0 /* Not currently used, but may be needed in the future */
16201603
# if defined(PERL_IN_UTF8_C)
16211604
#define warn_on_first_deprecated_use(a,b,c,d,e) S_warn_on_first_deprecated_use(aTHX_ a,b,c,d,e)
@@ -1661,6 +1644,12 @@
16611644
# if defined(DEBUG_LEAKING_SCALARS_FORK_DUMP)
16621645
#define dump_sv_child(a) Perl_dump_sv_child(aTHX_ a)
16631646
# endif
1647+
# if defined(HAS_LOCALECONV)
1648+
# if defined(PERL_IN_LOCALE_C)
1649+
#define my_localeconv(a) S_my_localeconv(aTHX_ a)
1650+
#define populate_hash_from_localeconv(a,b,c,d,e) S_populate_hash_from_localeconv(aTHX_ a,b,c,d,e)
1651+
# endif
1652+
# endif
16641653
# if defined(HAS_MSG) || defined(HAS_SEM) || defined(HAS_SHM)
16651654
#define do_ipcctl(a,b,c) Perl_do_ipcctl(aTHX_ a,b,c)
16661655
#define do_ipcget(a,b,c) Perl_do_ipcget(aTHX_ a,b,c)

0 commit comments

Comments
 (0)