@@ -9451,11 +9451,32 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
94519451#undef GET_DESCRIPTION
94529452#ifdef USE_LOCALE_COLLATE
94539453
9454- STATIC void
9454+ STATIC bool
94559455S_compute_collxfrm_coefficients (pTHX )
94569456{
9457-
9458- /* A locale collation definition includes primary, secondary, tertiary,
9457+ /* This is called from mem_collxfrm() the first time the latter is called
9458+ * on the current locale to do initialization for it.
9459+ *
9460+ * This returns true and initializes the coefficients for a linear equation
9461+ * that, given a string of some length, predicts how much memory it will
9462+ * take to hold the result of calling mem_collxfrm() on that string. The
9463+ * equation is of the form:
9464+ * m * length + b
9465+ * where m = PL_collxfrm_mult and b = PL_collxfrm_base
9466+ *
9467+ * It returns false if the locale does not appear to be sane.
9468+ *
9469+ * The prediction is just an educated guess to save time and,
9470+ * mem_collxrfm() may adjust it based on experience with strings it
9471+ * encounters.
9472+ *
9473+ * This function also:
9474+ * sets 'PL_in_utf8_COLLATE_locale' to indicate if the locale is a
9475+ * UTF-8 one
9476+ * initializes 'PL_strxfrm_NUL_replacement' to NUL
9477+ * initializes 'PL_strxfrm_max_cp' = 0;
9478+ *
9479+ * A locale collation definition includes primary, secondary, tertiary,
94599480 * etc. weights for each character. To sort, the primary weights are used,
94609481 * and only if they compare equal, then the secondary weights are used, and
94619482 * only if they compare equal, then the tertiary, etc.
@@ -9564,8 +9585,9 @@ S_compute_collxfrm_coefficients(pTHX)
95649585 "Disabling locale collation for LC_COLLATE='%s';"
95659586 " length for shorter sample=%zu; longer=%zu\n" ,
95669587 PL_collation_name , x_len_shorter , x_len_longer ));
9588+ return false;
95679589 }
9568- else {
9590+
95699591 SSize_t base ; /* Temporary */
95709592
95719593 /* We have both: m * strlen(longer) + b = x_len_longer
@@ -9597,7 +9619,6 @@ S_compute_collxfrm_coefficients(pTHX)
95979619
95989620 /* Add 1 for the trailing NUL */
95999621 PL_collxfrm_base = base + 1 ;
9600- }
96019622
96029623 DEBUG_L (PerlIO_printf (Perl_debug_log ,
96039624 "?UTF-8 locale=%d; x_len_shorter=%zu, "
@@ -9606,6 +9627,7 @@ S_compute_collxfrm_coefficients(pTHX)
96069627 PL_in_utf8_COLLATE_locale ,
96079628 x_len_shorter , x_len_longer ,
96089629 PL_collxfrm_mult , PL_collxfrm_base ));
9630+ return true;
96099631}
96109632
96119633char *
@@ -9668,7 +9690,9 @@ Perl_mem_collxfrm_(pTHX_ const char *input_string,
96689690
96699691 /* (mult, base) == (0,0) means we need to calculate mult and base
96709692 * before proceeding */
9671- S_compute_collxfrm_coefficients (aTHX );
9693+ if (! S_compute_collxfrm_coefficients (aTHX )) {
9694+ return NULL ; /* locale collation not sane */
9695+ }
96729696 }
96739697
96749698 /* Replace any embedded NULs with the control that sorts before any others.
0 commit comments