@@ -2869,94 +2869,13 @@ Perl_setlocale(const int category, const char * locale)
2869
2869
2870
2870
}
2871
2871
2872
- #ifdef USE_LOCALE
2873
-
2874
- STATIC const char *
2875
- S_save_to_buffer (const char * string , const char * * buf , Size_t * buf_size )
2876
- {
2877
- /* Copy the NUL-terminated 'string' to a buffer whose address before this
2878
- * call began at *buf, and whose available length before this call was
2879
- * *buf_size.
2880
- *
2881
- * If the length of 'string' is greater than the space available, the
2882
- * buffer is grown accordingly, which may mean that it gets relocated.
2883
- * *buf and *buf_size will be updated to reflect this.
2884
- *
2885
- * Regardless, the function returns a pointer to where 'string' is now
2886
- * stored.
2887
- *
2888
- * 'string' may be NULL, which means no action gets taken, and NULL is
2889
- * returned.
2890
- *
2891
- * If *buf or 'buf_size' are NULL or *buf_size is 0, the buffer is assumed
2892
- * empty, and memory is malloc'd. 'buf-size' being NULL is to be used
2893
- * when this is a single use buffer, which will shortly be freed by the
2894
- * caller.
2895
- */
2896
-
2897
- Size_t string_size ;
2898
-
2899
- PERL_ARGS_ASSERT_SAVE_TO_BUFFER ;
2900
-
2901
- if (! string ) {
2902
- return NULL ;
2903
- }
2904
-
2905
- /* No-op to copy over oneself */
2906
- if (string == * buf ) {
2907
- return string ;
2908
- }
2909
-
2910
- string_size = strlen (string ) + 1 ;
2911
-
2912
- if (buf_size == NULL ) {
2913
- Newx (* buf , string_size , char );
2914
- }
2915
- else if (* buf_size == 0 ) {
2916
- Newx (* buf , string_size , char );
2917
- * buf_size = string_size ;
2918
- }
2919
- else if (string_size > * buf_size ) {
2920
- Renew (* buf , string_size , char );
2921
- * buf_size = string_size ;
2922
- }
2923
-
2924
- {
2925
- dTHX_DEBUGGING ;
2926
- DEBUG_Lv (PerlIO_printf (Perl_debug_log ,
2927
- "Copying '%s' to %p\n" ,
2928
- ((is_utf8_string ((U8 * ) string , 0 ))
2929
- ? string
2930
- :_byte_dump_string ((U8 * ) string , strlen (string ), 0 )),
2931
- * buf ));
2932
- }
2933
-
2934
- # ifdef DEBUGGING
2935
-
2936
- /* Catch glitches. Usually this is because LC_CTYPE needs to be the same
2937
- * locale as whatever is being worked on */
2938
- if (UNLIKELY (instr (string , REPLACEMENT_CHARACTER_UTF8 ))) {
2939
- dTHX_DEBUGGING ;
2940
-
2941
- locale_panic_ (Perl_form (aTHX_
2942
- "Unexpected REPLACEMENT_CHARACTER in '%s'\n%s" ,
2943
- string , get_LC_ALL_display ()));
2944
- }
2945
-
2946
- # endif
2947
-
2948
- Copy (string , * buf , string_size , char );
2949
- return * buf ;
2950
- }
2951
-
2952
- #endif
2953
-
2954
2872
STATIC utf8ness_t
2955
2873
S_get_locale_string_utf8ness_i (pTHX_ const char * string ,
2956
2874
const locale_utf8ness_t known_utf8 ,
2957
2875
const char * locale ,
2958
2876
const unsigned cat_index )
2959
2877
{
2878
+ PERL_ARGS_ASSERT_GET_LOCALE_STRING_UTF8NESS_I ;
2960
2879
2961
2880
#ifndef USE_LOCALE
2962
2881
@@ -3059,7 +2978,130 @@ S_get_locale_string_utf8ness_i(pTHX_ const char * string,
3059
2978
3060
2979
}
3061
2980
2981
+ STATIC bool
2982
+ S_is_locale_utf8 (pTHX_ const char * locale )
2983
+ {
2984
+ /* Returns TRUE if the locale 'locale' is UTF-8; FALSE otherwise. It uses
2985
+ * my_langinfo(), which employs various methods to get this information
2986
+ * if nl_langinfo() isn't available, using heuristics as a last resort, in
2987
+ * which case, the result will very likely be correct for locales for
2988
+ * languages that have commonly used non-ASCII characters, but for notably
2989
+ * English, it comes down to if the locale's name ends in something like
2990
+ * "UTF-8". It errs on the side of not being a UTF-8 locale. */
2991
+
2992
+ # if ! defined(USE_LOCALE_CTYPE ) \
2993
+ || defined(EBCDIC ) /* There aren't any real UTF-8 locales at this time */
2994
+
2995
+ PERL_UNUSED_ARG (locale );
2996
+
2997
+ return FALSE;
2998
+
2999
+ # else
3000
+
3001
+ const char * scratch_buffer = NULL ;
3002
+ const char * codeset ;
3003
+ bool retval ;
3004
+
3005
+ PERL_ARGS_ASSERT_IS_LOCALE_UTF8 ;
3006
+
3007
+ if (strEQ (locale , PL_ctype_name )) {
3008
+ return PL_in_utf8_CTYPE_locale ;
3009
+ }
3010
+
3011
+ codeset = my_langinfo_c (CODESET , LC_CTYPE , locale ,
3012
+ & scratch_buffer , NULL , NULL );
3013
+ retval = is_codeset_name_UTF8 (codeset );
3014
+
3015
+ DEBUG_Lv (PerlIO_printf (Perl_debug_log ,
3016
+ "found codeset=%s, is_utf8=%d\n" , codeset , retval ));
3017
+
3018
+ Safefree (scratch_buffer );
3019
+ return retval ;
3020
+
3021
+ # endif
3022
+
3023
+ }
3024
+
3062
3025
#ifdef USE_LOCALE
3026
+
3027
+ STATIC const char *
3028
+ S_save_to_buffer (const char * string , const char * * buf , Size_t * buf_size )
3029
+ {
3030
+ /* Copy the NUL-terminated 'string' to a buffer whose address before this
3031
+ * call began at *buf, and whose available length before this call was
3032
+ * *buf_size.
3033
+ *
3034
+ * If the length of 'string' is greater than the space available, the
3035
+ * buffer is grown accordingly, which may mean that it gets relocated.
3036
+ * *buf and *buf_size will be updated to reflect this.
3037
+ *
3038
+ * Regardless, the function returns a pointer to where 'string' is now
3039
+ * stored.
3040
+ *
3041
+ * 'string' may be NULL, which means no action gets taken, and NULL is
3042
+ * returned.
3043
+ *
3044
+ * If *buf or 'buf_size' are NULL or *buf_size is 0, the buffer is assumed
3045
+ * empty, and memory is malloc'd. 'buf-size' being NULL is to be used
3046
+ * when this is a single use buffer, which will shortly be freed by the
3047
+ * caller.
3048
+ */
3049
+
3050
+ Size_t string_size ;
3051
+
3052
+ PERL_ARGS_ASSERT_SAVE_TO_BUFFER ;
3053
+
3054
+ if (! string ) {
3055
+ return NULL ;
3056
+ }
3057
+
3058
+ /* No-op to copy over oneself */
3059
+ if (string == * buf ) {
3060
+ return string ;
3061
+ }
3062
+
3063
+ string_size = strlen (string ) + 1 ;
3064
+
3065
+ if (buf_size == NULL ) {
3066
+ Newx (* buf , string_size , char );
3067
+ }
3068
+ else if (* buf_size == 0 ) {
3069
+ Newx (* buf , string_size , char );
3070
+ * buf_size = string_size ;
3071
+ }
3072
+ else if (string_size > * buf_size ) {
3073
+ Renew (* buf , string_size , char );
3074
+ * buf_size = string_size ;
3075
+ }
3076
+
3077
+ {
3078
+ dTHX_DEBUGGING ;
3079
+ DEBUG_Lv (PerlIO_printf (Perl_debug_log ,
3080
+ "Copying '%s' to %p\n" ,
3081
+ ((is_utf8_string ((U8 * ) string , 0 ))
3082
+ ? string
3083
+ :_byte_dump_string ((U8 * ) string , strlen (string ), 0 )),
3084
+ * buf ));
3085
+ }
3086
+
3087
+ # ifdef DEBUGGING
3088
+
3089
+ /* Catch glitches. Usually this is because LC_CTYPE needs to be the same
3090
+ * locale as whatever is being worked on */
3091
+ if (UNLIKELY (instr (string , REPLACEMENT_CHARACTER_UTF8 ))) {
3092
+ dTHX_DEBUGGING ;
3093
+
3094
+ locale_panic_ (Perl_form (aTHX_
3095
+ "Unexpected REPLACEMENT_CHARACTER in '%s'\n%s" ,
3096
+ string , get_LC_ALL_display ()));
3097
+ }
3098
+
3099
+ # endif
3100
+
3101
+ Copy (string , * buf , string_size , char );
3102
+ return * buf ;
3103
+ }
3104
+
3063
3105
# ifdef WIN32
3064
3106
3065
3107
bool
@@ -6255,52 +6297,7 @@ S_is_codeset_name_UTF8(const char * name)
6255
6297
&& (len == 4 || name [3 ] == '-' ));
6256
6298
}
6257
6299
6258
- #endif
6259
-
6260
- STATIC bool
6261
- S_is_locale_utf8 (pTHX_ const char * locale )
6262
- {
6263
- /* Returns TRUE if the locale 'locale' is UTF-8; FALSE otherwise. It uses
6264
- * my_langinfo(), which employs various methods to get this information
6265
- * if nl_langinfo() isn't available, using heuristics as a last resort, in
6266
- * which case, the result will very likely be correct for locales for
6267
- * languages that have commonly used non-ASCII characters, but for notably
6268
- * English, it comes down to if the locale's name ends in something like
6269
- * "UTF-8". It errs on the side of not being a UTF-8 locale. */
6270
-
6271
- # if ! defined(USE_LOCALE_CTYPE ) \
6272
- || defined(EBCDIC ) /* There aren't any real UTF-8 locales at this time */
6273
-
6274
- PERL_UNUSED_ARG (locale );
6275
-
6276
- return FALSE;
6277
-
6278
- # else
6279
-
6280
- const char * scratch_buffer = NULL ;
6281
- const char * codeset ;
6282
- bool retval ;
6283
-
6284
- PERL_ARGS_ASSERT_IS_LOCALE_UTF8 ;
6285
-
6286
- if (strEQ (locale , PL_ctype_name )) {
6287
- return PL_in_utf8_CTYPE_locale ;
6288
- }
6289
-
6290
- codeset = my_langinfo_c (CODESET , LC_CTYPE , locale ,
6291
- & scratch_buffer , NULL , NULL );
6292
- retval = is_codeset_name_UTF8 (codeset );
6293
-
6294
- DEBUG_Lv (PerlIO_printf (Perl_debug_log ,
6295
- "found codeset=%s, is_utf8=%d\n" , codeset , retval ));
6296
-
6297
- Safefree (scratch_buffer );
6298
- return retval ;
6299
-
6300
6300
# endif
6301
-
6302
- }
6303
-
6304
6301
#endif /* USE_LOCALE */
6305
6302
6306
6303
bool
0 commit comments