From f29af088018e0e5cb817d1f1eb4caafba9b06e3f Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Tue, 19 Aug 2025 07:36:42 +0200 Subject: [PATCH 1/6] porting/diag.t: fix some corner cases Before this patch ================= The porting/diag.t test tries to make sure every error message emitted by perl is documented in pod/perldiag.pod. The way it does this is as follows: 1. Collect list of diagnostic functions by taking all known functions whose name contains warn, err, die, croak, or deprecate. Add Perl_mess and anything that starts with PERL_DIAG_. Add the special (v)FAIL(2,3,4) macros used in regcomp.c. 2. For every input file, scan it line by line. 3. If the line starts with "#", skip it (so we don't find macro definitions like "#define croak(...) ..." themselves). 4. If the line contains the name of a diagnostic function, collect the line. 5. If the function name is followed by a simple argument list (basically "(...)" with at most one level of nested parentheses within), stop. Otherwise keep collecting lines until we find a line that ends with ");", which we take to be the end of the statement. 6. Join all collected lines into one blob of text and rescan it for the diagnostic message. 7. Read the next line and repeat (step 3). There are some issues with this approach. For example, step 5 may end up eating large chunks of the input file until a ");" line is encountered. If we're in a header that "#define"s many macros in a row, all of them will be swallowed here. Also, while "#define" lines themselves are skipped, the body of the macro may not be: If the "#define" line ends with a backslash, the rest of the macro definition (placed on the following lines) will be scanned as normal. Finally, there is a bug in the "simple argument list" check in step 5: It doesn't handle a completely empty argument list (as in "croak_memory_wrap()", for example), so it starts collecting lines until it finds the next ");". After this patch ================ Extend step 3: If the line is a definition of one of the regcomp error macros (e.g. "#define FAIL2(...)"), skip the entire definition, including backslash continuation lines. These macros are just wrappers around croak; they don't emit any particular diagnostics of their own. Extend step 5: Handle empty argument lists so "croak_memory_wrap()" does not start a runaway scan for ");". Extend step 5: When collecting lines, don't just stop at ");", but also at (or rather just before) lines starting with "#", so we don't blindly swallow preprocessor directives. If an error message is not found in perldiag, report the source location where it is emitted with the number of the first line of its collected chunk (i.e. the location of the diagnostic function name), not (as before) the last line of the chunk (i.e. the location of the next ");", which may be a long ways off). This exposes one new message in regcomp_internal.h, which has been added to perldiag. --- pod/perldiag.pod | 6 ++++++ t/porting/diag.t | 46 +++++++++++++++++++++++++++++++--------------- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 1ecf2c43e54f..52098043aed6 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -5228,6 +5228,12 @@ was string. (P) The compiler attempted to do a goto, or something weird like that. +=item panic! %s: %d: Tried to warn when none expected at '%s' + +(P) The regex engine tried to emit a warning for a temporary regex that it +generated internally. This should never happen because any regex thus generated +should be correct and free of warnings. + =item panic: unexpected constant lvalue entersub entry via type/targ %d:%d (P) When compiling a subroutine call in lvalue context, Perl failed an diff --git a/t/porting/diag.t b/t/porting/diag.t index 742f9a2d9aee..e4502b6c9045 100644 --- a/t/porting/diag.t +++ b/t/porting/diag.t @@ -12,7 +12,7 @@ use TestInit qw(T); # T is chdir to the top level use warnings; use strict; use Config; -use Data::Dumper; +#use Data::Dumper; require './t/test.pl'; if ( $Config{usecrosscompile} ) { @@ -56,9 +56,9 @@ push @functions, 'Perl_mess'; @functions = sort { length($b) <=> length($a) || $a cmp $b } @functions; push @functions, 'PERL_DIAG_(?\w+)'; -my $regcomp_fail_re = '\b(?:(?:Simple_)?v)?FAIL[2-4]?(?:utf8f)?\b'; +my $regcomp_func_re = '\b(?:(?:Simple_)?v)?FAIL[2-4]?(?:utf8f)?\b'; my $regcomp_re = - "(?ckWARN(?:\\d+)?reg\\w*|vWARN\\d+|$regcomp_fail_re)"; + "(?ckWARN(?:\\d+)?reg\\w*|vWARN\\d+|$regcomp_func_re)"; my $function_re = join '|', @functions; my $source_msg_re = "(?\\bDIE\\b|$function_re)"; @@ -69,7 +69,7 @@ my $source_msg_call_re = qr/$source_msg_re(?:_nocontext)? \s* (?:(?WARN_DEPRECATED__\w+)\s*,(?:\s*(?"[^"]+")\s*,)?)? \s* $text_re /x; my $bad_version_re = qr{BADVERSION\([^"]*$text_re}; - $regcomp_fail_re = qr/$regcomp_fail_re\([^"]*$text_re/; +my $regcomp_fail_re = qr/$regcomp_func_re\([^"]*$text_re/; my $regcomp_call_re = qr/$regcomp_re.*?$text_re/; my %entries; @@ -332,20 +332,36 @@ sub check_file { } } } - next if /^#/; + if (/^#/) { + if (/^#\h*define\h+$regcomp_func_re/) { + # skip over entire definition of (v)FAIL macro + while (defined && m!\\$!) { + $_ = <$codefh>; + } + } + next; + } my $multiline = 0; # Loop to accumulate the message text all on one line. - if (m/(?!^)\b(?:$source_msg_re(?:_nocontext)?|$regcomp_re)\s*\((?(?:[^()]+|\([^()]+\))+\))?/ + if (m/(?!^)\b(?:$source_msg_re(?:_nocontext)?|$regcomp_re)\s*\((?(?:[^()]|\([^()]*\))*\))?/ and !$+{tail} ) { while (not m/\);\s*$/) { - my $nextline = <$codefh>; + my $nextchar = getc $codefh; # Means we fell off the end of the file. Not terribly surprising; # this code tries to merge a lot of things that aren't regular C # code (preprocessor stuff, long comments). That's OK; we don't # need those anyway. - last if not defined $nextline; + last if not defined $nextchar; + + # If the next line is a preprocessor directive (starts with '#'), we + # stop without consuming it and let the next iteration of the outer + # loop handle it. + $codefh->ungetc(ord $nextchar); + last if $nextchar eq '#'; + + my $nextline = readline $codefh; chomp $nextline; $nextline =~ s/^\s+//; $_ =~ s/\\$//; @@ -475,12 +491,12 @@ sub check_file { next if $name=~/\[TESTING\]/; # ignore these as they are works in progress - check_message(standardize($name),$codefn,$severity,$categories); + check_message(standardize($name),$codefn,$first_line,$severity,$categories); } } sub check_message { - my($name,$codefn,$severity,$categories,$partial) = @_; + my($name,$codefn,$lineno,$severity,$categories,$partial) = @_; my $key = $name =~ y/\n/ /r; my $ret; @@ -511,7 +527,7 @@ sub check_message { # There is no listing, but it is in the list of exceptions. TODO FAIL. fail($key); diag( - " Message '$name'\n from $codefn line $. is not listed in $pod\n". + " Message '$name'\n from $codefn line $lineno is not listed in $pod\n". " (but it wasn't documented in 5.10 either, so marking it TODO)." ); } @@ -538,7 +554,7 @@ sub check_message { like($entries{$key}{severity}, $qr, ($severity =~ /\[/ ? "severity is one of $severity" - : "severity is $severity") . " for '$name' at $codefn line $.$pod_line") + : "severity is $severity") . " for '$name' at $codefn line $lineno$pod_line") or do { if ($severity=~/D/ and $entries{$key}{severity}=~/W/) { diag("You should change W to D if this is a deprecation"); @@ -547,7 +563,7 @@ sub check_message { is($entries{$key}{category}, $categories, ($categories ? "categories are [$categories]" : "no category") - . " for '$name' at $codefn line $.$pod_line"); + . " for '$name' at $codefn line $lineno$pod_line"); } } elsif ($partial) { # noop @@ -555,7 +571,7 @@ sub check_message { my $ok; if ($name =~ /\n/) { $ok = 1; - check_message($_,$codefn,$severity,$categories,1) or $ok = 0, last + check_message($_,$codefn,$lineno,$severity,$categories,1) or $ok = 0, last for split /\n/, $name; } if ($ok) { @@ -571,7 +587,7 @@ sub check_message { # No listing found, and no excuse either. # Find the correct place in perldiag.pod, and add a stanza beginning =item $name. fail($name); - diag(" Message '$name'\n from $codefn line $. is not listed in $pod"); + diag(" Message '$name'\n from $codefn line $lineno is not listed in $pod"); } # seen it, so only fail once for this message $entries{$name}{seen}++; From bcdfea78c2288cec8e8a306e064e690e1a64a673 Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Mon, 18 Aug 2025 22:21:01 +0200 Subject: [PATCH 2/6] fix potential MEM_SIZE overflow in expected_size() Coverity says: CID 584099: Integer handling issues (INTEGER_OVERFLOW) Expression "newlen + 8UL", where "newlen" is known to be equal to 18446744073709551615, overflows the type of "newlen + 8UL", which is type "unsigned long". (Referring to (n) + PTRSIZE - 1 where n = newlen and PTRSIZE = 8UL.) Crudely avoid the issue by checking n for overflow beforehand and dying with a "panic: memory wrap" error if so. --- perl.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/perl.h b/perl.h index 844f03fe141b..b4b96f1adad3 100644 --- a/perl.h +++ b/perl.h @@ -1700,8 +1700,15 @@ Use L to declare variables of the maximum usable size on this platform. * multiple of PTRSIZE, for a minimum of PERL_STRLEN_NEW_MIN. This is * not entirely useless, just not terribly accurate. */ -#define expected_size(n) ( ((n) > PERL_STRLEN_NEW_MIN) \ - ? (((n) + PTRSIZE - 1) & ~(PTRSIZE - 1)) \ +#define expected_size(n) ( ((n) > PERL_STRLEN_NEW_MIN) \ + ? ( \ + (void)( \ + (MEM_SIZE)(n) > MEM_SIZE_MAX - (PTRSIZE - 1) \ + ? (croak_memory_wrap(), 0) \ + : 0 \ + ), \ + ((MEM_SIZE)(n) + (PTRSIZE - 1)) & ~(MEM_SIZE)(PTRSIZE - 1) \ + ) \ : PERL_STRLEN_NEW_MIN ) /* This use of offsetof() requires /Zc:offsetof- for VS2017 (and presumably From 137855da8d23f77e5923547ea151eb9b5de06b8c Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Mon, 18 Aug 2025 22:18:13 +0200 Subject: [PATCH 3/6] fix potential IV overflow in do_sv_dump() Coverity says: CID 584102: Insecure data handling (INTEGER_OVERFLOW) The cast of "S_sequence_num(my_perl, ((XPVCV *)({...; p_;}))->xcv_start_u.xcv_start)" to a signed type could result in a negative number. Avoid the issue by taking the UV returned by sequence_num and printing it directly (without going through IV conversion). --- dump.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dump.c b/dump.c index efddad3f1652..c2f8310bf605 100644 --- a/dump.c +++ b/dump.c @@ -2765,9 +2765,9 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo PTR2UV(CvSTART(sv))); else Perl_dump_indent(aTHX_ level, file, - " START = 0x%" UVxf " ===> %" IVdf "\n", + " START = 0x%" UVxf " ===> %" UVuf "\n", PTR2UV(CvSTART(sv)), - (IV)sequence_num(CvSTART(sv))); + sequence_num(CvSTART(sv))); } Perl_dump_indent(aTHX_ level, file, " ROOT = 0x%" UVxf "\n", PTR2UV(CvROOT(sv))); From d7c56017cc3d1d66354aad92ca6386b98730a9ae Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Mon, 18 Aug 2025 22:29:35 +0200 Subject: [PATCH 4/6] don't quite fix potential I32 overflow in S_run_user_filter Coverity says: CID 584101: Integer handling issues (INTEGER_OVERFLOW) Expression "status", where "Perl_SvIV(my_perl, out)" is known to be equal to 0, overflows the type of "status", which is type "int". What it really means is that doing 'int status = SvIV(...)' may overflow (since IV can be (and on 64-bit systems usually is) wider than int). This patch doesn't fix that issue. However, it avoids yet another type in the mix: S_run_user_filter() is declared as returning I32, not int, and elsewhere assigns filter_read(...) (also an I32) to 'result'. So instead of worrying about (overflowing) conversions between IV, int, and I32, make 'result' an I32 and only worry about (overflowing) conversions between IV and I32. --- pp_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pp_ctl.c b/pp_ctl.c index bd83bd2fbd69..466cd6269bf2 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -6852,7 +6852,7 @@ S_run_user_filter(pTHX_ int idx, SV *buf_sv, int maxlen) const int filter_has_file = IoLINES(datasv); SV * const filter_state = MUTABLE_SV(IoTOP_GV(datasv)); SV * const filter_sub = MUTABLE_SV(IoBOTTOM_GV(datasv)); - int status = 0; + I32 status = 0; SV *upstream; STRLEN got_len; char *got_p = NULL; From 2aadb9ce1ea6292a31682b18d2442bca7e1a6ca5 Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Mon, 18 Aug 2025 22:35:43 +0200 Subject: [PATCH 5/6] pp_sort: avoid potential I32 overflow from the comparator Coverity says: CID 584092: Integer handling issues (INTEGER_OVERFLOW) Expression "result", where "Perl_SvIV(my_perl, *my_perl->Istack_sp)" is known to be equal to 0, overflows the type of "result", which is type "I32". The sort comparison function returns IV (a Perl integer), but all the sorting routines in this file want to work with I32. Instead of converting (and possibly truncating) the value directly, normalize the result to -1/0/1, which fits in any integer type. --- pp_sort.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/pp_sort.c b/pp_sort.c index a3dc19e89792..de40b659e440 100644 --- a/pp_sort.c +++ b/pp_sort.c @@ -1167,7 +1167,6 @@ static I32 S_sortcv(pTHX_ SV *const a, SV *const b) { const I32 oldsaveix = PL_savestack_ix; - I32 result; PMOP * const pm = PL_curpm; COP * const cop = PL_curcop; SV *olda, *oldb; @@ -1191,7 +1190,11 @@ S_sortcv(pTHX_ SV *const a, SV *const b) /* entry zero of a stack is always PL_sv_undef, which * simplifies converting a '()' return into undef in scalar context */ assert(PL_stack_sp > PL_stack_base || *PL_stack_base == &PL_sv_undef); - result = SvIV(*PL_stack_sp); + const IV iv = SvIV(*PL_stack_sp); + const I32 result = + iv > 0 ? 1 : + iv < 0 ? -1 : + 0; rpp_popfree_to_NN(PL_stack_base); LEAVE_SCOPE(oldsaveix); @@ -1206,7 +1209,6 @@ static I32 S_sortcv_stacked(pTHX_ SV *const a, SV *const b) { const I32 oldsaveix = PL_savestack_ix; - I32 result; AV * const av = GvAV(PL_defgv); PMOP * const pm = PL_curpm; COP * const cop = PL_curcop; @@ -1256,7 +1258,11 @@ S_sortcv_stacked(pTHX_ SV *const a, SV *const b) /* entry zero of a stack is always PL_sv_undef, which * simplifies converting a '()' return into undef in scalar context */ assert(PL_stack_sp > PL_stack_base || *PL_stack_base == &PL_sv_undef); - result = SvIV(*PL_stack_sp); + const IV iv = SvIV(*PL_stack_sp); + const I32 result = + iv > 0 ? 1 : + iv < 0 ? -1 : + 0; rpp_popfree_to_NN(PL_stack_base); LEAVE_SCOPE(oldsaveix); @@ -1273,7 +1279,6 @@ S_sortcv_xsub(pTHX_ SV *const a, SV *const b) { const I32 oldsaveix = PL_savestack_ix; CV * const cv=MUTABLE_CV(PL_sortcop); - I32 result; PMOP * const pm = PL_curpm; PERL_ARGS_ASSERT_SORTCV_XSUB; @@ -1291,7 +1296,11 @@ S_sortcv_xsub(pTHX_ SV *const a, SV *const b) /* entry zero of a stack is always PL_sv_undef, which * simplifies converting a '()' return into undef in scalar context */ assert(PL_stack_sp > PL_stack_base || *PL_stack_base == &PL_sv_undef); - result = SvIV(*PL_stack_sp); + const IV iv = SvIV(*PL_stack_sp); + const I32 result = + iv > 0 ? 1 : + iv < 0 ? -1 : + 0; rpp_popfree_to_NN(PL_stack_base); LEAVE_SCOPE(oldsaveix); From 11e1d23bf83081b8f6c9ad00ce7203c94d388981 Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Mon, 18 Aug 2025 22:39:53 +0200 Subject: [PATCH 6/6] pp_sleep: fix potential I32 overflow Coverity says: CID 584095: Integer handling issues (INTEGER_OVERFLOW) Expression "duration", where "(IV)Perl_SvIV(my_perl, *sp--)" is known to be equal to 0, overflows the type of "duration", which is type "I32 const". There are two dodgy type conversions in this function: from IV (POPi) to I32 (duration), and from I32 (duration) to unsigned int (sleep argument, by cast). Avoid the one Coverity complains about by making 'duration' an IV. --- pp_sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pp_sys.c b/pp_sys.c index 881fc7baa497..9a47afd82bc3 100644 --- a/pp_sys.c +++ b/pp_sys.c @@ -5123,7 +5123,7 @@ PP_wrapped(pp_sleep, MAXARG, 0) if (MAXARG < 1 || (!TOPs && !POPs)) PerlProc_pause(); else { - const I32 duration = POPi; + const IV duration = POPi; if (duration < 0) { /* diag_listed_as: %s() with negative argument */ ck_warner_d(packWARN(WARN_MISC),