Skip to content

Commit 9cac334

Browse files
committed
locale.c: Move 2 functions elsewhere in the code
This is in preparation for them to be called on platforms where locale handling is not enabled.
1 parent 525e8d0 commit 9cac334

File tree

4 files changed

+135
-138
lines changed

4 files changed

+135
-138
lines changed

embed.fnc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3330,17 +3330,18 @@ SG |bool |sv_derived_from_svpvn |NULLOK SV *sv \
33303330
#endif
33313331

33323332
#if defined(PERL_IN_LOCALE_C)
3333+
S |utf8ness_t|get_locale_string_utf8ness_i \
3334+
|NULLOK const char * string \
3335+
|const locale_utf8ness_t known_utf8 \
3336+
|NULLOK const char * locale \
3337+
|const unsigned cat_index
3338+
S |bool |is_locale_utf8 |NN const char * locale
33333339
# ifdef USE_LOCALE
33343340
iR |const char *|mortalized_pv_copy|NULLOK const char * const pv
33353341
ST |const char *|save_to_buffer|NULLOK const char * string \
33363342
|NULLOK const char **buf \
33373343
|NULLOK Size_t *buf_size
33383344
ST |unsigned int|get_category_index|const int category|NULLOK const char * locale
3339-
S |utf8ness_t|get_locale_string_utf8ness_i \
3340-
|NULLOK const char * string \
3341-
|const locale_utf8ness_t known_utf8 \
3342-
|NULLOK const char * locale \
3343-
|const unsigned cat_index
33443345
# ifdef USE_LOCALE_CTYPE
33453346
S |void |new_ctype |NN const char* newctype
33463347
ST |bool |is_codeset_name_UTF8|NN const char * name
@@ -3365,7 +3366,6 @@ So |const char *|toggle_locale_i|const unsigned switch_cat_index \
33653366
So |void |restore_toggled_locale_i|const unsigned cat_index \
33663367
|NULLOK const char * original_locale \
33673368
|const line_t caller_line
3368-
S |bool |is_locale_utf8 |NN const char * locale
33693369
# if (defined(HAS_LOCALECONV) || defined(HAS_LOCALECONV_L)) \
33703370
&& (defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC))
33713371
S |HV * |my_localeconv|const int item

embed.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1763,10 +1763,10 @@
17631763
#define unshare_hek_or_pvn(a,b,c,d) S_unshare_hek_or_pvn(aTHX_ a,b,c,d)
17641764
# endif
17651765
# if defined(PERL_IN_LOCALE_C)
1766-
# if defined(USE_LOCALE)
1767-
#define get_category_index S_get_category_index
17681766
#define get_locale_string_utf8ness_i(a,b,c,d) S_get_locale_string_utf8ness_i(aTHX_ a,b,c,d)
17691767
#define is_locale_utf8(a) S_is_locale_utf8(aTHX_ a)
1768+
# if defined(USE_LOCALE)
1769+
#define get_category_index S_get_category_index
17701770
#define mortalized_pv_copy(a) S_mortalized_pv_copy(aTHX_ a)
17711771
#define new_LC_ALL(a) S_new_LC_ALL(aTHX_ a)
17721772
#define save_to_buffer S_save_to_buffer

locale.c

Lines changed: 124 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -2869,94 +2869,13 @@ Perl_setlocale(const int category, const char * locale)
28692869

28702870
}
28712871

2872-
#ifdef USE_LOCALE
2873-
2874-
STATIC const char *
2875-
S_save_to_buffer(const char * string, const char **buf, Size_t *buf_size)
2876-
{
2877-
/* Copy the NUL-terminated 'string' to a buffer whose address before this
2878-
* call began at *buf, and whose available length before this call was
2879-
* *buf_size.
2880-
*
2881-
* If the length of 'string' is greater than the space available, the
2882-
* buffer is grown accordingly, which may mean that it gets relocated.
2883-
* *buf and *buf_size will be updated to reflect this.
2884-
*
2885-
* Regardless, the function returns a pointer to where 'string' is now
2886-
* stored.
2887-
*
2888-
* 'string' may be NULL, which means no action gets taken, and NULL is
2889-
* returned.
2890-
*
2891-
* If *buf or 'buf_size' are NULL or *buf_size is 0, the buffer is assumed
2892-
* empty, and memory is malloc'd. 'buf-size' being NULL is to be used
2893-
* when this is a single use buffer, which will shortly be freed by the
2894-
* caller.
2895-
*/
2896-
2897-
Size_t string_size;
2898-
2899-
PERL_ARGS_ASSERT_SAVE_TO_BUFFER;
2900-
2901-
if (! string) {
2902-
return NULL;
2903-
}
2904-
2905-
/* No-op to copy over oneself */
2906-
if (string == *buf) {
2907-
return string;
2908-
}
2909-
2910-
string_size = strlen(string) + 1;
2911-
2912-
if (buf_size == NULL) {
2913-
Newx(*buf, string_size, char);
2914-
}
2915-
else if (*buf_size == 0) {
2916-
Newx(*buf, string_size, char);
2917-
*buf_size = string_size;
2918-
}
2919-
else if (string_size > *buf_size) {
2920-
Renew(*buf, string_size, char);
2921-
*buf_size = string_size;
2922-
}
2923-
2924-
{
2925-
dTHX_DEBUGGING;
2926-
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
2927-
"Copying '%s' to %p\n",
2928-
((is_utf8_string((U8 *) string, 0))
2929-
? string
2930-
:_byte_dump_string((U8 *) string, strlen(string), 0)),
2931-
*buf));
2932-
}
2933-
2934-
# ifdef DEBUGGING
2935-
2936-
/* Catch glitches. Usually this is because LC_CTYPE needs to be the same
2937-
* locale as whatever is being worked on */
2938-
if (UNLIKELY(instr(string, REPLACEMENT_CHARACTER_UTF8))) {
2939-
dTHX_DEBUGGING;
2940-
2941-
locale_panic_(Perl_form(aTHX_
2942-
"Unexpected REPLACEMENT_CHARACTER in '%s'\n%s",
2943-
string, get_LC_ALL_display()));
2944-
}
2945-
2946-
# endif
2947-
2948-
Copy(string, *buf, string_size, char);
2949-
return *buf;
2950-
}
2951-
2952-
#endif
2953-
29542872
STATIC utf8ness_t
29552873
S_get_locale_string_utf8ness_i(pTHX_ const char * string,
29562874
const locale_utf8ness_t known_utf8,
29572875
const char * locale,
29582876
const unsigned cat_index)
29592877
{
2878+
PERL_ARGS_ASSERT_GET_LOCALE_STRING_UTF8NESS_I;
29602879

29612880
#ifndef USE_LOCALE
29622881

@@ -3059,7 +2978,130 @@ S_get_locale_string_utf8ness_i(pTHX_ const char * string,
30592978

30602979
}
30612980

2981+
STATIC bool
2982+
S_is_locale_utf8(pTHX_ const char * locale)
2983+
{
2984+
/* Returns TRUE if the locale 'locale' is UTF-8; FALSE otherwise. It uses
2985+
* my_langinfo(), which employs various methods to get this information
2986+
* if nl_langinfo() isn't available, using heuristics as a last resort, in
2987+
* which case, the result will very likely be correct for locales for
2988+
* languages that have commonly used non-ASCII characters, but for notably
2989+
* English, it comes down to if the locale's name ends in something like
2990+
* "UTF-8". It errs on the side of not being a UTF-8 locale. */
2991+
2992+
# if ! defined(USE_LOCALE_CTYPE) \
2993+
|| defined(EBCDIC) /* There aren't any real UTF-8 locales at this time */
2994+
2995+
PERL_UNUSED_ARG(locale);
2996+
2997+
return FALSE;
2998+
2999+
# else
3000+
3001+
const char * scratch_buffer = NULL;
3002+
const char * codeset;
3003+
bool retval;
3004+
3005+
PERL_ARGS_ASSERT_IS_LOCALE_UTF8;
3006+
3007+
if (strEQ(locale, PL_ctype_name)) {
3008+
return PL_in_utf8_CTYPE_locale;
3009+
}
3010+
3011+
codeset = my_langinfo_c(CODESET, LC_CTYPE, locale,
3012+
&scratch_buffer, NULL, NULL);
3013+
retval = is_codeset_name_UTF8(codeset);
3014+
3015+
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
3016+
"found codeset=%s, is_utf8=%d\n", codeset, retval));
3017+
3018+
Safefree(scratch_buffer);
3019+
return retval;
3020+
3021+
# endif
3022+
3023+
}
3024+
30623025
#ifdef USE_LOCALE
3026+
3027+
STATIC const char *
3028+
S_save_to_buffer(const char * string, const char **buf, Size_t *buf_size)
3029+
{
3030+
/* Copy the NUL-terminated 'string' to a buffer whose address before this
3031+
* call began at *buf, and whose available length before this call was
3032+
* *buf_size.
3033+
*
3034+
* If the length of 'string' is greater than the space available, the
3035+
* buffer is grown accordingly, which may mean that it gets relocated.
3036+
* *buf and *buf_size will be updated to reflect this.
3037+
*
3038+
* Regardless, the function returns a pointer to where 'string' is now
3039+
* stored.
3040+
*
3041+
* 'string' may be NULL, which means no action gets taken, and NULL is
3042+
* returned.
3043+
*
3044+
* If *buf or 'buf_size' are NULL or *buf_size is 0, the buffer is assumed
3045+
* empty, and memory is malloc'd. 'buf-size' being NULL is to be used
3046+
* when this is a single use buffer, which will shortly be freed by the
3047+
* caller.
3048+
*/
3049+
3050+
Size_t string_size;
3051+
3052+
PERL_ARGS_ASSERT_SAVE_TO_BUFFER;
3053+
3054+
if (! string) {
3055+
return NULL;
3056+
}
3057+
3058+
/* No-op to copy over oneself */
3059+
if (string == *buf) {
3060+
return string;
3061+
}
3062+
3063+
string_size = strlen(string) + 1;
3064+
3065+
if (buf_size == NULL) {
3066+
Newx(*buf, string_size, char);
3067+
}
3068+
else if (*buf_size == 0) {
3069+
Newx(*buf, string_size, char);
3070+
*buf_size = string_size;
3071+
}
3072+
else if (string_size > *buf_size) {
3073+
Renew(*buf, string_size, char);
3074+
*buf_size = string_size;
3075+
}
3076+
3077+
{
3078+
dTHX_DEBUGGING;
3079+
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
3080+
"Copying '%s' to %p\n",
3081+
((is_utf8_string((U8 *) string, 0))
3082+
? string
3083+
:_byte_dump_string((U8 *) string, strlen(string), 0)),
3084+
*buf));
3085+
}
3086+
3087+
# ifdef DEBUGGING
3088+
3089+
/* Catch glitches. Usually this is because LC_CTYPE needs to be the same
3090+
* locale as whatever is being worked on */
3091+
if (UNLIKELY(instr(string, REPLACEMENT_CHARACTER_UTF8))) {
3092+
dTHX_DEBUGGING;
3093+
3094+
locale_panic_(Perl_form(aTHX_
3095+
"Unexpected REPLACEMENT_CHARACTER in '%s'\n%s",
3096+
string, get_LC_ALL_display()));
3097+
}
3098+
3099+
# endif
3100+
3101+
Copy(string, *buf, string_size, char);
3102+
return *buf;
3103+
}
3104+
30633105
# ifdef WIN32
30643106

30653107
bool
@@ -6255,52 +6297,7 @@ S_is_codeset_name_UTF8(const char * name)
62556297
&& (len == 4 || name[3] == '-'));
62566298
}
62576299

6258-
#endif
6259-
6260-
STATIC bool
6261-
S_is_locale_utf8(pTHX_ const char * locale)
6262-
{
6263-
/* Returns TRUE if the locale 'locale' is UTF-8; FALSE otherwise. It uses
6264-
* my_langinfo(), which employs various methods to get this information
6265-
* if nl_langinfo() isn't available, using heuristics as a last resort, in
6266-
* which case, the result will very likely be correct for locales for
6267-
* languages that have commonly used non-ASCII characters, but for notably
6268-
* English, it comes down to if the locale's name ends in something like
6269-
* "UTF-8". It errs on the side of not being a UTF-8 locale. */
6270-
6271-
# if ! defined(USE_LOCALE_CTYPE) \
6272-
|| defined(EBCDIC) /* There aren't any real UTF-8 locales at this time */
6273-
6274-
PERL_UNUSED_ARG(locale);
6275-
6276-
return FALSE;
6277-
6278-
# else
6279-
6280-
const char * scratch_buffer = NULL;
6281-
const char * codeset;
6282-
bool retval;
6283-
6284-
PERL_ARGS_ASSERT_IS_LOCALE_UTF8;
6285-
6286-
if (strEQ(locale, PL_ctype_name)) {
6287-
return PL_in_utf8_CTYPE_locale;
6288-
}
6289-
6290-
codeset = my_langinfo_c(CODESET, LC_CTYPE, locale,
6291-
&scratch_buffer, NULL, NULL);
6292-
retval = is_codeset_name_UTF8(codeset);
6293-
6294-
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
6295-
"found codeset=%s, is_utf8=%d\n", codeset, retval));
6296-
6297-
Safefree(scratch_buffer);
6298-
return retval;
6299-
63006300
# endif
6301-
6302-
}
6303-
63046301
#endif /* USE_LOCALE */
63056302

63066303
bool

proto.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5736,14 +5736,14 @@ PERL_CALLCONV SV* Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp)
57365736

57375737
#endif
57385738
#if defined(PERL_IN_LOCALE_C)
5739-
# if defined(USE_LOCALE)
5740-
STATIC unsigned int S_get_category_index(const int category, const char * locale);
5741-
#define PERL_ARGS_ASSERT_GET_CATEGORY_INDEX
57425739
STATIC utf8ness_t S_get_locale_string_utf8ness_i(pTHX_ const char * string, const locale_utf8ness_t known_utf8, const char * locale, const unsigned cat_index);
57435740
#define PERL_ARGS_ASSERT_GET_LOCALE_STRING_UTF8NESS_I
57445741
STATIC bool S_is_locale_utf8(pTHX_ const char * locale);
57455742
#define PERL_ARGS_ASSERT_IS_LOCALE_UTF8 \
57465743
assert(locale)
5744+
# if defined(USE_LOCALE)
5745+
STATIC unsigned int S_get_category_index(const int category, const char * locale);
5746+
#define PERL_ARGS_ASSERT_GET_CATEGORY_INDEX
57475747
#ifndef PERL_NO_INLINE_FUNCTIONS
57485748
PERL_STATIC_INLINE const char * S_mortalized_pv_copy(pTHX_ const char * const pv)
57495749
__attribute__warn_unused_result__;

0 commit comments

Comments
 (0)