diff --git a/MANIFEST b/MANIFEST index 401e6c2fa8dc..72b20c026c4d 100644 --- a/MANIFEST +++ b/MANIFEST @@ -5190,6 +5190,7 @@ ext/XS-APItest/t/sv_numeq.t Test sv_numeq ext/XS-APItest/t/sv_streq.t Test sv_streq ext/XS-APItest/t/svcat.t Test sv_catpvn ext/XS-APItest/t/svcatpvf.t Test sv_catpvf argument reordering +ext/XS-APItest/t/svcow.t Test COW ext/XS-APItest/t/sviscow.t Test SvIsCOW ext/XS-APItest/t/svpeek.t XS::APItest extension ext/XS-APItest/t/svpv.t More generic SvPVbyte and SvPVutf8 tests diff --git a/embed.fnc b/embed.fnc index 0332f63875ec..2a9a61b445fd 100644 --- a/embed.fnc +++ b/embed.fnc @@ -4144,7 +4144,7 @@ RTp |MEM_SIZE|malloc_good_size \ #endif #if defined(PERL_ANY_COW) : Used in regexec.c -EXpx |SV * |sv_setsv_cow |NULLOK SV *dsv \ +EXdpx |bool |sv_setsv_cow |NN SV **pdsv \ |NN SV *ssv #endif #if defined(PERL_CORE) diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index 69561db6a106..469f7646fcec 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -3158,6 +3158,17 @@ sv_setsv_cow_hashkey_core() bool sv_setsv_cow_hashkey_notcore() +void +sv_grow(SV *sv, UV len) + CODE: + sv_force_normal(sv); + SvGROW(sv, len); + +void +sv_force_normal(SV *sv) + CODE: + sv_force_normal(sv); + void sv_set_deref(SV *sv, SV *sv2, int which) CODE: diff --git a/ext/XS-APItest/t/svcow.t b/ext/XS-APItest/t/svcow.t new file mode 100644 index 000000000000..5bd927424613 --- /dev/null +++ b/ext/XS-APItest/t/svcow.t @@ -0,0 +1,46 @@ +#!perl +use strict; +use warnings; +use XS::APItest; +use B; + +use Test::More tests => 11; + +{ + # github #21877 + # the regexp engine would COW an SV that had a large + # SvLEN() in cases where sv_setsv() wouldn't. + # This led to some surprises. + # - On cywgin this produced some strange performance problems + # - In general it meant the (large) buffer of the SV remained + # allocated for longer than it otherwise would. + # Also, since the SV became CoW, further copies would also + # be CoW, for example, code like: + # + # while (<>) { # sv_getsv() currently allocates a large-ish buffer + # /regex that (captures)/; # CoW large buffer + # push @save, $_; # copy in @save still has that large buffer + # } + my $x = "Something\n" x 1000; + cmp_ok(length $x, '>=', 1250, + "need to be at least 1250 to be COWed"); + sv_grow($x, 1_000_000); + my $ref = B::svref_2object(\$x); + cmp_ok($ref->LEN, '>=', 1_000_000, + "check we got it longer"); + ok(!SvIsCOW($x), "not cow before"); + is($ref->REFCNT, 1, "expected reference count"); + ok($x =~ /me(.)hing/, "match"); + ok(!SvIsCOW($x), "not cow after"); + + # make sure reasonable SVs are COWed + my $y = "Something\n" x 1000; + sv_force_normal($y); + cmp_ok(length $y, '>=', 1250, + "need to be at least 1250 to be COWed"); + my $ref2 = B::svref_2object(\$y); + ok(!SvIsCOW($y), "not cow before"); + is($ref2->REFCNT, 1, "expected reference count"); + ok($y =~ /me(.)hing/, "match"); + ok(SvIsCOW($y), "is cow after"); +} diff --git a/proto.h b/proto.h index e132956f8ac8..6eaa2106a054 100644 --- a/proto.h +++ b/proto.h @@ -6047,10 +6047,10 @@ Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv) #endif /* !defined(NO_MATHOMS) */ #if defined(PERL_ANY_COW) -PERL_CALLCONV SV * -Perl_sv_setsv_cow(pTHX_ SV *dsv, SV *ssv); +PERL_CALLCONV bool +Perl_sv_setsv_cow(pTHX_ SV **pdsv, SV *ssv); # define PERL_ARGS_ASSERT_SV_SETSV_COW \ - assert(ssv) + assert(pdsv); assert(ssv) #endif #if defined(PERL_CORE) diff --git a/regexec.c b/regexec.c index 7c1e246537c0..f10762a4160f 100644 --- a/regexec.c +++ b/regexec.c @@ -3559,7 +3559,9 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx, else { /* create new COW SV to share string */ RXp_MATCH_COPY_FREE(prog); - RXp_SAVED_COPY(prog) = sv_setsv_cow(RXp_SAVED_COPY(prog), sv); + /* sv_setsv_cow() might not COW for some reason */ + if (!sv_setsv_cow(&RXp_SAVED_COPY(prog), sv)) + goto didnt_cow; } RXp_SUBBEG(prog) = (char *)SvPVX_const(RXp_SAVED_COPY(prog)); assert (SvPOKp(RXp_SAVED_COPY(prog))); @@ -3569,6 +3571,9 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx, } else #endif { +#ifdef PERL_ANY_COW + didnt_cow: ; +#endif SSize_t min = 0; SSize_t max = strend - strbeg; SSize_t sublen; diff --git a/sv.c b/sv.c index e83fd763591d..17f3a03bdb99 100644 --- a/sv.c +++ b/sv.c @@ -4902,10 +4902,33 @@ Perl_sv_setsv_mg(pTHX_ SV *const dsv, SV *const ssv) SvSETMAGIC(dsv); } +/* +=for apidoc sv_setsv_cow + +Attempt to make a COW copy of C in C<*pdsv>. + +C<*pdsv> must be NULL or a valid SV, if NULL it will be filled in with +a valid SV on success. + +C must be a POK, pPOK SV. + +Returns true if the copy succeeds, false if a CoW copy cannot be made +for some reason. + +sv_setsv_cow() is used by the regular expression engine to attempt to +make a COW copy of the matched against string for use in reporting +C<$1> etc. + +If this fails the regular expression engine instead makes a non-SV +copy of a subset of the matched against string. + +=cut +*/ + #ifdef PERL_ANY_COW # define SVt_COW SVt_PV -SV * -Perl_sv_setsv_cow(pTHX_ SV *dsv, SV *ssv) +bool +Perl_sv_setsv_cow(pTHX_ SV **pdsv, SV *ssv) { STRLEN cur = SvCUR(ssv); STRLEN len = SvLEN(ssv); @@ -4916,6 +4939,8 @@ Perl_sv_setsv_cow(pTHX_ SV *dsv, SV *ssv) #endif PERL_ARGS_ASSERT_SV_SETSV_COW; + + SV *dsv = *pdsv; #ifdef DEBUGGING if (DEBUG_C_TEST) { PerlIO_printf(Perl_debug_log, "Fast copy on write: %p -> %p\n", @@ -4925,6 +4950,13 @@ Perl_sv_setsv_cow(pTHX_ SV *dsv, SV *ssv) sv_dump(dsv); } #endif + if (!SvIsCOW(ssv) && + (!CHECK_COWBUF_THRESHOLD(cur, len) + || ! CHECK_COW_THRESHOLD(cur, len))) { + DEBUG_C(PerlIO_printf(Perl_debug_log, + "Fast copy on write: Sizes %zu/%zu not appropriate to COW\n", cur, len)); + return FALSE; + } if (dsv) { if (SvTHINKFIRST(dsv)) sv_force_normal_flags(dsv, SV_COW_DROP_PV); @@ -4970,6 +5002,7 @@ Perl_sv_setsv_cow(pTHX_ SV *dsv, SV *ssv) sv_buf_to_ro(ssv); common_exit: + *pdsv = dsv; SvPV_set(dsv, new_pv); SvFLAGS(dsv) = new_flags; if (SvUTF8(ssv)) @@ -4980,7 +5013,7 @@ Perl_sv_setsv_cow(pTHX_ SV *dsv, SV *ssv) if (DEBUG_C_TEST) sv_dump(dsv); #endif - return dsv; + return TRUE; } #endif