@@ -724,6 +724,10 @@ S_does_utf8_overflow(const U8 * const s, const U8 * e)
724724STRLEN
725725Perl_is_utf8_char_helper_ (const U8 * const s , const U8 * e , const U32 flags )
726726{
727+ PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_ ;
728+ assert (e > s );
729+ assert (0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE ));
730+
727731 SSize_t len , full_len ;
728732
729733 /* An internal helper function.
@@ -751,12 +755,6 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags)
751755 * determined with just the first one or two bytes.
752756 *
753757 */
754-
755- PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_ ;
756-
757- assert (e > s );
758- assert (0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE ));
759-
760758 full_len = UTF8SKIP (s );
761759
762760 len = e - s ;
@@ -841,6 +839,10 @@ Size_t
841839Perl_is_utf8_FF_helper_ (const U8 * const s0 , const U8 * const e ,
842840 const bool require_partial )
843841{
842+ PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_ ;
843+ assert (s0 < e );
844+ assert (* s0 == I8_TO_NATIVE_UTF8 (0xFF ));
845+
844846 /* This is called to determine if the UTF-8 sequence starting at s0 and
845847 * continuing for up to one full character of bytes, but looking no further
846848 * than 'e - 1', is legal. *s0 must be 0xFF (or whatever the native
@@ -867,11 +869,6 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e,
867869 const U8 * s = s0 + 1 ;
868870 const U8 * send = e ;
869871
870- PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_ ;
871-
872- assert (s0 < e );
873- assert (* s0 == I8_TO_NATIVE_UTF8 (0xFF ));
874-
875872 send = s + MIN (UTF8_MAXBYTES - 1 , e - s );
876873 while (s < send ) {
877874 if (! UTF8_IS_CONTINUATION (* s )) {
@@ -4247,6 +4244,9 @@ STATIC UV
42474244S_turkic_fc (pTHX_ const U8 * const p , const U8 * const e ,
42484245 U8 * ustrp , STRLEN * lenp )
42494246{
4247+ PERL_ARGS_ASSERT_TURKIC_FC ;
4248+ assert (e > p );
4249+
42504250 /* Returns 0 if the foldcase of the input UTF-8 encoded sequence from
42514251 * p0..e-1 according to Turkic rules is the same as for non-Turkic.
42524252 * Otherwise, it returns the first code point of the Turkic foldcased
@@ -4257,9 +4257,6 @@ S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e,
42574257 * I WITH DOT ABOVE form a case pair, as do 'I' and LATIN SMALL LETTER
42584258 * DOTLESS I */
42594259
4260- PERL_ARGS_ASSERT_TURKIC_FC ;
4261- assert (e > p );
4262-
42634260 if (UNLIKELY (* p == 'I' )) {
42644261 * lenp = 2 ;
42654262 ustrp [0 ] = UTF8_TWO_BYTE_HI (LATIN_SMALL_LETTER_DOTLESS_I );
@@ -4282,15 +4279,15 @@ STATIC UV
42824279S_turkic_lc (pTHX_ const U8 * const p0 , const U8 * const e ,
42834280 U8 * ustrp , STRLEN * lenp )
42844281{
4282+ PERL_ARGS_ASSERT_TURKIC_LC ;
4283+ assert (e > p0 );
4284+
42854285 /* Returns 0 if the lowercase of the input UTF-8 encoded sequence from
42864286 * p0..e-1 according to Turkic rules is the same as for non-Turkic.
42874287 * Otherwise, it returns the first code point of the Turkic lowercased
42884288 * sequence, and the entire sequence will be stored in *ustrp. ustrp will
42894289 * contain *lenp bytes */
42904290
4291- PERL_ARGS_ASSERT_TURKIC_LC ;
4292- assert (e > p0 );
4293-
42944291 /* A 'I' requires context as to what to do */
42954292 if (UNLIKELY (* p0 == 'I' )) {
42964293 const U8 * p = p0 + 1 ;
@@ -4328,6 +4325,9 @@ STATIC UV
43284325S_turkic_uc (pTHX_ const U8 * const p , const U8 * const e ,
43294326 U8 * ustrp , STRLEN * lenp )
43304327{
4328+ PERL_ARGS_ASSERT_TURKIC_UC ;
4329+ assert (e > p );
4330+
43314331 /* Returns 0 if the upper or title-case of the input UTF-8 encoded sequence
43324332 * from p0..e-1 according to Turkic rules is the same as for non-Turkic.
43334333 * Otherwise, it returns the first code point of the Turkic upper or
@@ -4338,9 +4338,6 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e,
43384338 * I WITH DOT ABOVE form a case pair, as do 'I' and LATIN SMALL LETTER
43394339 * DOTLESS I */
43404340
4341- PERL_ARGS_ASSERT_TURKIC_UC ;
4342- assert (e > p );
4343-
43444341 if (* p == 'i' ) {
43454342 * lenp = 2 ;
43464343 ustrp [0 ] = UTF8_TWO_BYTE_HI (LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE );
0 commit comments