diff --git a/embed.fnc b/embed.fnc index fbe1aef0179a..e509cd39b5e7 100644 --- a/embed.fnc +++ b/embed.fnc @@ -35,7 +35,7 @@ : Supported at least since perl-5.23.8, with or without ppport.h. : : Lines in this file are of the form: -: flags|return_type|name|arg1|arg2|...|argN +: flags|return_type|name|arg1|arg2|...|argN ( assert(...) )* : : 'flags' is a string of single letters. Most of the flags are meaningful only : to embed.pl; some only to autodoc.pl, and others only to makedef.pl. The @@ -46,6 +46,9 @@ : A line may be continued onto the next by ending it with a backslash. : Leading and trailing whitespace will be ignored in each component. : +: The optional list of asserts is used to customize the generated +: PERL_ARGS_ASSERT macro. See AUTOMATIC PARAMETER SANITY CHECKING below +: : Most entries here have a macro created with the entry name. This presents : name space collision potentials which haven't been well thought out, but are : now documented here. In practice this has rarely been an issue. At least, @@ -172,6 +175,14 @@ : may not be of the correct type. As already mentioned, NOCHECK : suppresses this check. : +: You can specify your own checking beyond these by adding any number of +: assert() calls to any given entry after its final argument. Whatever you +: specify will be added to the ARGS_ASSERT macro for the entry in the order +: you've specified, and after all the assertions that already have been +: described in this section. When adding yours, weigh that doing it here +: will make it less visible to a maintainer than keeping it in the function +: it applies to +: : Currently, it is optional to include an empty ARGS_ASSERT macro in your : functions. But a porting test enforces that a non-empty one is included. : The call should be at the top of your function so that the sanity checks @@ -442,13 +453,6 @@ : flag even on a format function is if the format would generate error: : format string argument is not a string type : -: 'G' Suppress empty PERL_ARGS_ASSERT_foo macro. Normally such a macro is -: generated for all entries for functions 'foo' in this file. The macro -: is empty unless regen/embed.pl deems that there should be assert() -: calls to verify the sanity of some or all of foo's arguments. -: -: proto.h: An empty PERL_ARGS_ASSERT macro is not defined -: : 'h' Hide any documentation that would normally go into perlapi or : perlintern. This is typically used when the documentation is actually : in another pod. If you don't use the 'h', that documentation is @@ -750,7 +754,8 @@ ARdp |SSize_t|av_len |NN AV *av ARdp |AV * |av_make |SSize_t size \ |NN SV **strp CRdip |AV * |av_new_alloc |SSize_t size \ - |bool zeroflag + |bool zeroflag \ + assert(size > 0) p |SV * |av_nonelem |NN AV *av \ |SSize_t ix Adp |SV * |av_pop |NN AV *av @@ -2410,7 +2415,7 @@ p |void |no_bareword_filehandle \ |NN const char *fhname Tefprv |void |noperl_die |NN const char *pat \ |... -CGTdp |void |noshutdownhook +CTdp |void |noshutdownhook Adp |int |nothreadhook p |void |notify_parser_that_encoding_changed : Used in perly.y @@ -4524,11 +4529,12 @@ op |void |sv_add_backref |NN SV * const tsv \ |NN SV * const sv #endif #if defined(PERL_IN_GV_C) || defined(PERL_IN_UNIVERSAL_C) -EGdp |HV * |gv_stashsvpvn_cached \ - |SV *namesv \ - |const char *name \ +Edp |HV * |gv_stashsvpvn_cached \ + |NULLOK SV *namesv \ + |NULLOK const char *name \ |U32 namelen \ - |I32 flags + |I32 flags \ + assert(namesv || name) #endif #if defined(PERL_IN_HV_C) Sx |void |clear_placeholders \ @@ -6107,17 +6113,19 @@ S |I32 |utf16_textfilter \ # endif #endif /* defined(PERL_IN_TOKE_C) */ #if defined(PERL_IN_UNIVERSAL_C) -GS |bool |isa_lookup |NULLOK NOCHECK HV *stash \ +S |bool |isa_lookup |NN HV *stash \ |NULLOK SV *namesv \ |NULLOK const char *name \ |STRLEN len \ - |U32 flags -GS |bool |sv_derived_from_svpvn \ - |NULLOK SV *sv \ + |U32 flags \ + assert(namesv || name) +S |bool |sv_derived_from_svpvn \ + |NN SV *sv \ |NULLOK SV *namesv \ |NULLOK const char *name \ |const STRLEN len \ - |U32 flags + |U32 flags \ + assert(namesv || name) #endif #if defined(PERL_IN_UTF8_C) RS |UV |check_locale_boundary_crossing \ diff --git a/gv.c b/gv.c index a5960c82f853..e494bc30969b 100644 --- a/gv.c +++ b/gv.c @@ -1691,17 +1691,14 @@ reasons. =cut */ -#define PERL_ARGS_ASSERT_GV_STASHSVPVN_CACHED \ - assert(namesv || name) - HV* Perl_gv_stashsvpvn_cached(pTHX_ SV *namesv, const char *name, U32 namelen, I32 flags) { + PERL_ARGS_ASSERT_GV_STASHSVPVN_CACHED; + HV* stash; HE* he; - PERL_ARGS_ASSERT_GV_STASHSVPVN_CACHED; - he = (HE *)hv_common( PL_stashcache, namesv, name, namelen, (flags & SVf_UTF8) ? HVhek_UTF8 : 0, 0, NULL, 0 diff --git a/inline.h b/inline.h index 40d069324b64..3d6c3e0d3ec5 100644 --- a/inline.h +++ b/inline.h @@ -191,10 +191,10 @@ initialized. PERL_STATIC_INLINE AV * Perl_av_new_alloc(pTHX_ SSize_t size, bool zeroflag) { + PERL_ARGS_ASSERT_AV_NEW_ALLOC; + AV * const av = newAV(); SV** ary; - PERL_ARGS_ASSERT_AV_NEW_ALLOC; - assert(size > 0); Newx(ary, size, SV*); /* Newx performs the memwrap check */ AvALLOC(av) = ary; diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 855bfe34da52..10f1b8e6be1a 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -423,6 +423,19 @@ memory saving is unrealistic. C now substitutes the Unicode REPLACEMENT CHARACTER for malformed input. Previously it used the NUL character. +=item * + +For core Perl maintainers, the syntax of F has been extended. +Every function, C, named in this file has generated for it a +macro named C. The macro expands to C +calls that do basic sanity checking for each argument to C that +we currently deem as being appropriate to have such checking. (That +means that many arguments are not checked, and that the generated macro +may currently expand to nothing.) With this release, you can add +C statements yourself to F that will be +incorporated into the generated macro, beyond the system-generated ones. +Comments and examples in F give details. + =back =head1 Selected Bug Fixes diff --git a/proto.h b/proto.h index 6e4ae660c7bf..623ea2dddbde 100644 --- a/proto.h +++ b/proto.h @@ -3281,6 +3281,7 @@ Perl_noperl_die(const char *pat, ...) PERL_CALLCONV void Perl_noshutdownhook(void); +#define PERL_ARGS_ASSERT_NOSHUTDOWNHOOK PERL_CALLCONV int Perl_nothreadhook(pTHX); @@ -3758,7 +3759,7 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, const char * const strbeg, assert(rx); assert(strbeg); assert(strpos); assert(strend) PERL_CALLCONV SV * -Perl_re_intuit_string(pTHX_ REGEXP * const r); +Perl_re_intuit_string(pTHX_ REGEXP * const r); #define PERL_ARGS_ASSERT_RE_INTUIT_STRING \ assert(r) @@ -6944,6 +6945,8 @@ Perl_sv_add_backref(pTHX_ SV * const tsv, SV * const sv) PERL_CALLCONV HV * Perl_gv_stashsvpvn_cached(pTHX_ SV *namesv, const char *name, U32 namelen, I32 flags) __attribute__visibility__("hidden"); +# define PERL_ARGS_ASSERT_GV_STASHSVPVN_CACHED \ + assert(namesv || name) #endif #if defined(PERL_IN_HV_C) @@ -9464,11 +9467,15 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen); #if defined(PERL_IN_UNIVERSAL_C) STATIC bool S_isa_lookup(pTHX_ HV *stash, SV *namesv, const char *name, STRLEN len, U32 flags); +# define PERL_ARGS_ASSERT_ISA_LOOKUP \ + assert(stash); assert(SvTYPE(stash) == SVt_PVHV); assert(namesv || name) STATIC bool S_sv_derived_from_svpvn(pTHX_ SV *sv, SV *namesv, const char *name, const STRLEN len, U32 flags); +# define PERL_ARGS_ASSERT_SV_DERIVED_FROM_SVPVN \ + assert(sv); assert(namesv || name) -#endif +#endif /* defined(PERL_IN_UNIVERSAL_C) */ #if defined(PERL_IN_UTF8_C) STATIC UV S__to_utf8_case(pTHX_ const UV original, const U8 *p, U8 *ustrp, STRLEN *lenp, SV *invlist, const I32 * const invmap, const U32 * const * const aux_tables, const U8 * const aux_table_lengths, const char * const normal); @@ -9758,7 +9765,8 @@ Perl_av_fetch_simple(pTHX_ AV *av, SSize_t key, I32 lval) PERL_STATIC_INLINE AV * Perl_av_new_alloc(pTHX_ SSize_t size, bool zeroflag) __attribute__warn_unused_result__; -# define PERL_ARGS_ASSERT_AV_NEW_ALLOC +# define PERL_ARGS_ASSERT_AV_NEW_ALLOC \ + assert(size > 0) PERL_STATIC_INLINE void Perl_av_push_simple(pTHX_ AV *av, SV *val); diff --git a/regen/HeaderParser.pm b/regen/HeaderParser.pm index 98281f4e6ab0..ab0f4ce00aad 100644 --- a/regen/HeaderParser.pm +++ b/regen/HeaderParser.pm @@ -737,46 +737,94 @@ sub lines { $_[0]->{lines} } sub tidy_embed_fnc_entry { my ($self, $line_data)= @_; my $line= $line_data->{line}; - return $line if $line =~ /^\s*:/; - return $line unless $line_data->{type} eq "content"; - return $line unless $line =~ /\|/; - $line =~ s/\s*\\\n/ /g; - $line =~ s/\s+\z//; - ($line)= expand($line); + return $line if $line =~ /^\s*:/; # Don't tidy comments + return $line unless $line_data->{type} eq "content"; # Nor #if-like + return $line unless $line =~ /\|/; # Nor non-entries + + $line =~ s/\s*\\\n/ /g; # Embedded \n to blank + $line =~ s/\s+\z//; # No trailing white space + ($line)= expand($line); # No tabs + + # Remove any assertions, and save them. This must be done before the + # split because the assertions can contain '|' + $line =~ s/ \b ( assert \s* \( .* ) \z //x; + my $assertions = $1; + + # Split into fields my ($flags, $ret, $name, @args)= split /\s*\|\s*/, $line; + + # Sort and remove duplicate flags. Alpha flags are sorted first my %flag_seen; - $flags= join "", grep !$flag_seen{$_}++, sort split //, $flags; - if ($flags =~ s/^#//) { - $flags .= "#"; - } - if ($flags eq "#") { + $flags = join "", grep !$flag_seen{$_}++, + sort { + my $a_is_word = $a =~ /\w/; + my $b_is_word = $b =~ /\w/; + return $a cmp $b if $a_is_word == $b_is_word; + return -1 if $a_is_word; + return 1; + } split //, $flags; + + if ($flags eq "#") { # Could be an attempt at a conditional die "Not allowed to use only '#' for flags" . "in 'embed.fnc' at line $line_data->{start_line_num}"; } + if (!$flags) { die "Missing flags in function definition" . " in 'embed.fnc' at line $line_data->{start_line_num}\n" . "Did you a forget a line continuation on the previous line?\n"; } + + # Normalize the return type and arguments for ($ret, @args) { s/(\w)\*/$1 */g; s/\*\s+(\w)/*$1/g; s/\*const/* const/g; } + + # Start the output; right justify my $head= sprintf "%-8s|%-7s", $flags, $ret; $head .= sprintf "|%*s", -(31 - length($head)), $name; + + # Start first argument on next line if $head already extends too far to + # the right if (@args and length($head) > 32) { $head .= "\\\n"; $head .= " " x 32; } + + # Add each argument on a separate line foreach my $ix (0 .. $#args) { my $arg= $args[$ix]; $head .= "|$arg"; $head .= "\\\n" . (" " x 32) if $ix < $#args; } + + my @assertions; + if ($assertions) { + # Put each assertion into a separate array element + @assertions = split / \s* assert \s* \( /x, $assertions; + shift @assertions; # The split leaves an empty first element + + # Trim each assertion, including any trailing semicolon + foreach my $this_assertion (@assertions) { + $this_assertion =~ s/ ^ \s+ //x; + $this_assertion =~ s/ \s+ \z //x; + $this_assertion =~ s/ ; \z //x; + + # Restore split delimitter + $this_assertion = "assert($this_assertion"; + + # Each assertion is on a separate line (for now, anyway) + $head .= "\\\n" . (" " x 32); + $head .= $this_assertion; + } + } + $line= $head . "\n"; + # Make all lines in this entry the same length; minimum 72 if ($line =~ /\\\n/) { my @lines= split /\s*\\\n/, $line; my $len= length($lines[0]); @@ -787,15 +835,18 @@ sub tidy_embed_fnc_entry { (map { sprintf "%*s", -$len, $_ } @lines[ 0 .. $#lines - 1 ]), $lines[-1]); } - ($line)= unexpand($line); + + ($line)= unexpand($line); # Back to using tabs $line_data->{embed}= EmbedLine->new( flags => $flags, return_type => $ret, name => $name, args => \@args, + assertions => \@assertions, start_line_num => $line_data->{start_line_num}, ); + $line =~ s/\s+\z/\n/; $line_data->{line}= $line; return $line; diff --git a/regen/embed.pl b/regen/embed.pl index 1033420b89d8..6a1aa645bc0b 100755 --- a/regen/embed.pl +++ b/regen/embed.pl @@ -121,13 +121,14 @@ sub generate_proto_h { $ind .= " " x ($level-1) if $level>1; my $inner_ind= $ind ? " " : " "; - my ($flags,$retval,$plain_func,$args) = @{$embed}{qw(flags return_type name args)}; - if ($flags =~ / ( [^ AabCDdEefFGhIiMmNnOoPpRrSsTUuvWXx;] ) /xx) { + my ($flags, $retval, $plain_func, $args, $assertions ) = + @{$embed}{qw(flags return_type name args assertions)}; + if ($flags =~ / ( [^ AabCDdEefFhIiMmNnOoPpRrSsTUuvWXx;] ) /xx) { die_at_end "flag $1 is not legal (for function $plain_func)"; } my @nonnull; - my $args_assert_line = ( $flags !~ /[Gm]/ ); + my $args_assert_line = ( $flags !~ /m/ ); my $has_depth = ( $flags =~ /W/ ); my $has_context = ( $flags !~ /T/ ); my $never_returns = ( $flags =~ /r/ ); @@ -242,6 +243,7 @@ sub generate_proto_h { my $n; for my $arg ( @$args ) { ++$n; + if ($arg =~ / ^ " (.+) " $ /x) { # Handle literal string my $name = $1; @@ -254,48 +256,66 @@ sub generate_proto_h { die_at_end 'm flag required for "literal" argument' unless $has_mflag; } - elsif ( $args_assert_line - && $arg =~ /\*/ - && $arg !~ /\b(NN|NULLOK)\b/ ) - { - warn "$func: $arg needs NN or NULLOK\n"; - ++$unflagged_pointers; - } - - my $nn = ( $arg =~ s/\s*\bNN\b\s+// ); - my $nz = ( $arg =~ s/\s*\bNZ\b\s+// ); - my $nullok = ( $arg =~ s/\s*\bNULLOK\b\s+// ); - my $nocheck = ( $arg =~ s/\s*\bNOCHECK\b\s+// ); + else { + my $nn = ( $arg =~ s/\bNN\b// ); + my $nz = ( $arg =~ s/\bNZ\b// ); + my $nullok = ( $arg =~ s/\bNULLOK\b// ); + my $nocheck = ( $arg =~ s/\bNOCHECK\b// ); + + # Trim $arg and remove multiple blanks + $arg =~ s/^\s+//; + $arg =~ s/\s+$//; + $arg =~ s/\s{2,}/ /g; + + die_at_end ":$func: $arg Use only one of NN, NULLOK, and NZ" + if 0 + $nn + $nz + $nullok > 1; + + push( @nonnull, $n ) if $nn; + + # A non-pointer shouldn't have a pointer-related modifier. + # But typedefs may be pointers without our knowing it, so + # we can't check for non-pointer issues. We can only + # check for the case where the argument is definitely a + # pointer. + if ($args_assert_line && $arg =~ /\*/) { + if ($nn + $nullok == 0) { + warn "$func: $arg needs NN or NULLOK\n"; + ++$unflagged_pointers; + } + + warn "$func: $arg should not have NZ\n" if $nz; + } - push( @nonnull, $n ) if $nn; + push( @nonnull, $n ) if $nn; - # Make sure each arg has at least a type and a var name. - # An arg of "int" is valid C, but want it to be "int foo". - my $argtype = ( $arg =~ m/^(\w+(?:\s*\*+)?)/ )[0]; - defined $argtype and $argtype =~ s/\s+//g; + # Make sure each arg has at least a type and a var name. + # An arg of "int" is valid C, but want it to be "int foo". + my $argtype = ( $arg =~ m/^(\w+(?:\s*\*+)?)/ )[0]; + defined $argtype and $argtype =~ s/\s+//g; - my $temp_arg = $arg; - $temp_arg =~ s/\*//g; - $temp_arg =~ s/\s*\bstruct\b\s*/ /g; - if ( ($temp_arg ne "...") - && ($temp_arg !~ /\w+\s+(\w+)(?:\[\d+\])?\s*$/) ) { - die_at_end "$func: $arg ($n) doesn't have a name\n"; - } - my $argname = $1; - - if (defined $argname && (! $has_mflag || $binarycompat)) { - if ($nn||$nz) { - push @asserts, "assert($argname)"; + my $temp_arg = $arg; + $temp_arg =~ s/\*//g; + $temp_arg =~ s/\s*\bstruct\b\s*/ /g; + if ( ($temp_arg ne "...") + && ($temp_arg !~ /\w+\s+(\w+)(?:\[\d+\])?\s*$/) ) { + die_at_end "$func: $arg ($n) doesn't have a name\n"; } - - if ( ! $nocheck - && defined $argtype - && exists $type_asserts{$argtype}) - { - my $type_assert = - $type_asserts{$argtype} =~ s/__arg__/$argname/gr; - $type_assert = "!$argname || $type_assert" if $nullok; - push @asserts, "assert($type_assert)"; + my $argname = $1; + + if (defined $argname && (! $has_mflag || $binarycompat)) { + if ($nn||$nz) { + push @asserts, "assert($argname)"; + } + + if ( ! $nocheck + && defined $argtype + && exists $type_asserts{$argtype}) + { + my $type_assert = + $type_asserts{$argtype} =~ s/__arg__/$argname/gr; + $type_assert = "!$argname || $type_assert" if $nullok; + push @asserts, "assert($type_assert)"; + } } } } @@ -306,6 +326,9 @@ sub generate_proto_h { } $ret .= " comma_pDEPTH" if $has_depth; $ret .= ")"; + + push @asserts, @$assertions if $assertions; + my @attrs; if ( $flags =~ /r/ ) { push @attrs, "__attribute__noreturn__"; diff --git a/universal.c b/universal.c index d60bf9585e11..34e5e6669043 100644 --- a/universal.c +++ b/universal.c @@ -39,20 +39,15 @@ * The main guts of traverse_isa was actually copied from gv_fetchmeth */ -#define PERL_ARGS_ASSERT_ISA_LOOKUP \ - assert(stash); \ - assert(namesv || name) - - STATIC bool S_isa_lookup(pTHX_ HV *stash, SV *namesv, const char * name, STRLEN len, U32 flags) { + PERL_ARGS_ASSERT_ISA_LOOKUP; + const struct mro_meta *const meta = HvMROMETA(stash); HV *isa = meta->isa; const HV *our_stash; - PERL_ARGS_ASSERT_ISA_LOOKUP; - if (!isa) { (void)mro_get_linear_isa(stash); isa = meta->isa; @@ -83,10 +78,6 @@ S_isa_lookup(pTHX_ HV *stash, SV *namesv, const char * name, STRLEN len, U32 fla return FALSE; } -#define PERL_ARGS_ASSERT_SV_DERIVED_FROM_SVPVN \ - assert(sv); \ - assert(namesv || name) - STATIC bool S_sv_derived_from_svpvn(pTHX_ SV *sv, SV *namesv, const char * name, const STRLEN len, U32 flags) {