Move some ARGS_ASSERT macros to function start

khwilliamson · khwilliamson · commit 3a7b25bc8b5e · 2025-10-12T13:53:12.000-06:00
Historically the asserts had to be placed after any declarations
because of limitations in the C89 Standard that have been removed in C99
which we are now following.

Placing the assertions at the function beginning is clearer, and stops
any issues with the code below it using a variable prior to its
assertion.
diff --git a/regexec.c b/regexec.c
@@ -626,14 +626,13 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character, const U8* e)
 STATIC U8 *
 S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
 {
+    PERL_ARGS_ASSERT_FIND_SPAN_END;
+    assert(send >= s);
+
     /* Returns the position of the first byte in the sequence between 's' and
      * 'send-1' inclusive that isn't 'span_byte'; returns 'send' if none found.
      * */
 
-    PERL_ARGS_ASSERT_FIND_SPAN_END;
-
-    assert(send >= s);
-
     if ((STRLEN) (send - s) >= PERL_WORDSIZE
                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
                           - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
@@ -700,16 +699,15 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
 STATIC U8 *
 S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
 {
+    PERL_ARGS_ASSERT_FIND_NEXT_MASKED;
+    assert(send >= s);
+    assert((byte & mask) == byte);
+
     /* Returns the position of the first byte in the sequence between 's'
      * and 'send-1' inclusive that when ANDed with 'mask' yields 'byte';
      * returns 'send' if none found.  It uses word-level operations instead of
      * byte to speed up the process */
 
-    PERL_ARGS_ASSERT_FIND_NEXT_MASKED;
-
-    assert(send >= s);
-    assert((byte & mask) == byte);
-
 #ifndef EBCDIC
 
     if ((STRLEN) (send - s) >= PERL_WORDSIZE
@@ -778,17 +776,16 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
 STATIC U8 *
 S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
 {
+    PERL_ARGS_ASSERT_FIND_SPAN_END_MASK;
+    assert(send >= s);
+    assert((span_byte & mask) == span_byte);
+
     /* Returns the position of the first byte in the sequence between 's' and
      * 'send-1' inclusive that when ANDed with 'mask' isn't 'span_byte'.
      * 'span_byte' should have been ANDed with 'mask' in the call of this
      * function.  Returns 'send' if none found.  Works like find_span_end(),
      * except for the AND */
 
-    PERL_ARGS_ASSERT_FIND_SPAN_END_MASK;
-
-    assert(send >= s);
-    assert((span_byte & mask) == span_byte);
-
     if ((STRLEN) (send - s) >= PERL_WORDSIZE
                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
                           - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
@@ -11793,10 +11790,6 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
     bool retval = true;
     SCX_enum * ret_script = NULL;
 
-    assert(send >= s);
-
-    PERL_ARGS_ASSERT_ISSCRIPT_RUN;
-
     /* All code points in 0..255 are either Common or Latin, so must be a
      * script run.  We can return immediately unless we need to know which
      * script it is. */
diff --git a/toke.c b/toke.c
@@ -1886,16 +1886,15 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
 STATIC void
 S_incline(pTHX_ const char *s, const char *end)
 {
+    PERL_ARGS_ASSERT_INCLINE;
+    assert(end >= s);
+
     const char *t;
     const char *n;
     const char *e;
     line_t line_num;
     UV uv;
 
-    PERL_ARGS_ASSERT_INCLINE;
-
-    assert(end >= s);
-
     COPLINE_INC_WITH_HERELINES;
     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
      && s+1 == PL_bufend && *s == ';') {
@@ -2855,6 +2854,10 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
                                           const bool is_utf8,
                                           const char ** error_msg)
 {
+    PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
+    assert(e >= s);
+    assert(s > (char *) 3);
+
     /* <s> points to first character of interior of \N{}, <e> to one beyond the
      * interior, hence to the "}".  Finds what the name resolves to, returning
      * an SV* containing it; NULL if no valid one found.
@@ -2875,12 +2878,6 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
     const char* context = s - 3;
     STRLEN context_len = e - context + 1; /* include all of \N{...} */
 
-
-    PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
-
-    assert(e >= s);
-    assert(s > (char *) 3);
-
     while (s < e && isBLANK(*s)) {
         s++;
     }
diff --git a/utf8.c b/utf8.c
@@ -724,6 +724,10 @@ S_does_utf8_overflow(const U8 * const s, const U8 * e)
 STRLEN
 Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags)
 {
+    PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_;
+    assert(e > s);
+    assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
+
     SSize_t len, full_len;
 
     /* An internal helper function.
@@ -751,12 +755,6 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags)
      *  determined with just the first one or two bytes.
      *
      */
-
-    PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_;
-
-    assert(e > s);
-    assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
-
     full_len = UTF8SKIP(s);
 
     len = e - s;
@@ -841,6 +839,10 @@ Size_t
 Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e,
                         const bool require_partial)
 {
+    PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_;
+    assert(s0 < e);
+    assert(*s0 == I8_TO_NATIVE_UTF8(0xFF));
+
     /* This is called to determine if the UTF-8 sequence starting at s0 and
      * continuing for up to one full character of bytes, but looking no further
      * than 'e - 1', is legal.  *s0 must be 0xFF (or whatever the native
@@ -867,11 +869,6 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e,
     const U8 *s = s0 + 1;
     const U8 *send = e;
 
-    PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_;
-
-    assert(s0 < e);
-    assert(*s0 == I8_TO_NATIVE_UTF8(0xFF));
-
     send = s + MIN(UTF8_MAXBYTES - 1, e - s);
     while (s < send) {
         if (! UTF8_IS_CONTINUATION(*s)) {
@@ -4247,6 +4244,9 @@ STATIC UV
 S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e,
                         U8 * ustrp, STRLEN *lenp)
 {
+    PERL_ARGS_ASSERT_TURKIC_FC;
+    assert(e > p);
+
     /* Returns 0 if the foldcase of the input UTF-8 encoded sequence from
      * p0..e-1 according to Turkic rules is the same as for non-Turkic.
      * Otherwise, it returns the first code point of the Turkic foldcased
@@ -4257,9 +4257,6 @@ S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e,
      * I WITH DOT ABOVE form a case pair, as do 'I' and LATIN SMALL LETTER
      * DOTLESS I */
 
-    PERL_ARGS_ASSERT_TURKIC_FC;
-    assert(e > p);
-
     if (UNLIKELY(*p == 'I')) {
         *lenp = 2;
         ustrp[0] = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
@@ -4282,15 +4279,15 @@ STATIC UV
 S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e,
                         U8 * ustrp, STRLEN *lenp)
 {
+    PERL_ARGS_ASSERT_TURKIC_LC;
+    assert(e > p0);
+
     /* Returns 0 if the lowercase of the input UTF-8 encoded sequence from
      * p0..e-1 according to Turkic rules is the same as for non-Turkic.
      * Otherwise, it returns the first code point of the Turkic lowercased
      * sequence, and the entire sequence will be stored in *ustrp.  ustrp will
      * contain *lenp bytes */
 
-    PERL_ARGS_ASSERT_TURKIC_LC;
-    assert(e > p0);
-
     /* A 'I' requires context as to what to do */
     if (UNLIKELY(*p0 == 'I')) {
         const U8 * p = p0 + 1;
@@ -4328,6 +4325,9 @@ STATIC UV
 S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e,
                         U8 * ustrp, STRLEN *lenp)
 {
+    PERL_ARGS_ASSERT_TURKIC_UC;
+    assert(e > p);
+
     /* Returns 0 if the upper or title-case of the input UTF-8 encoded sequence
      * from p0..e-1 according to Turkic rules is the same as for non-Turkic.
      * Otherwise, it returns the first code point of the Turkic upper or
@@ -4338,9 +4338,6 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e,
      * I WITH DOT ABOVE form a case pair, as do 'I' and LATIN SMALL LETTER
      * DOTLESS I */
 
-    PERL_ARGS_ASSERT_TURKIC_UC;
-    assert(e > p);
-
     if (*p == 'i') {
         *lenp = 2;
         ustrp[0] = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
diff --git a/util.c b/util.c
@@ -612,13 +612,13 @@ Perl_delimcpy_no_escape(char *to, const char *to_end,
                         const char *from, const char *from_end,
                         const int delim, I32 *retlen)
 {
+    PERL_ARGS_ASSERT_DELIMCPY_NO_ESCAPE;
+
     const char * delim_pos;
     ptrdiff_t from_len = from_end - from;
     ptrdiff_t to_len = to_end - to;
     SSize_t copy_len;
 
-    PERL_ARGS_ASSERT_DELIMCPY_NO_ESCAPE;
-
     assert(from_len >= 0);
     assert(to_len >= 0);
 
@@ -717,14 +717,14 @@ Perl_delimcpy(char *to, const char *to_end,
               const char *from, const char *from_end,
               const int delim, I32 *retlen)
 {
-    const char * const orig_to = to;
-    ptrdiff_t copy_len = 0;
-    bool stopped_early = FALSE;     /* Ran out of room to copy to */
-
     PERL_ARGS_ASSERT_DELIMCPY;
     assert(from_end >= from);
     assert(to_end >= to);
 
+    const char * const orig_to = to;
+    ptrdiff_t copy_len = 0;
+    bool stopped_early = FALSE;     /* Ran out of room to copy to */
+
     /* Don't use the loop for the trivial case of the first character being the
      * delimiter; otherwise would have to worry inside the loop about backing
      * up before the start of 'from' */
@@ -1099,6 +1099,9 @@ a littlestr of "ab\n", SvTAIL matches as:
 char *
 Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U32 flags)
 {
+    PERL_ARGS_ASSERT_FBM_INSTR;
+    assert(bigend >= big);
+
     unsigned char *s;
     STRLEN l;
     const unsigned char *little = (const unsigned char *)SvPV_const(littlestr,l);
@@ -1107,10 +1110,6 @@ Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U
     bool valid = SvVALID(littlestr);
     bool tail = valid ? cBOOL(SvTAIL(littlestr)) : FALSE;
 
-    PERL_ARGS_ASSERT_FBM_INSTR;
-
-    assert(bigend >= big);
-
     if ((STRLEN)(bigend - big) < littlelen) {
         if (     tail
              && ((STRLEN)(bigend - big) == littlelen - 1)