From 8993af9d1f44c04b8feaea56f46fbb33703e67a5 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Wed, 3 Dec 2025 11:42:56 +0000 Subject: [PATCH] make pregexec() handle zero-length strings again GH #23903 In embed.fnc, commit v5.43.3-167-g45ea12db26 added SPTR, EPTR parameter modifiers to (amongst other API functions), Perl_pregexec(). These cause assert constraints to be added to the effect that SPTR < EPTR (since the latter is supposed to be a pointer to the byte after the last character in the string). This falls down for an empty string since in this case pregexec() is called with strbeg == strend. This was causing an assert failure in the test suite for Package-Stash-XS. The reason it wasn't noticed before is because: 1) pregexec() is a thin wrapper over regexec_flags(); 2) The perl core (e.g. pp_match()) calls regexec_flags() rather than pregexec(); 3) Package::Stash::XS has XS code which calls pregexec() directly rather than using CALLREGEXEC() (which would call regexec_flags()); 4) In embed.fnc, regexec_flags()'s strend parameter is declared as NN rather than EPTR, so it doesn't get the assert added. So very little code was actually using pregexec(). This commit, for now, changes pregexec()'s strend parameter from EPTR to EPTRQ, which has the net effect of allowing zero-length strings to be passed, and thus fixes the CPAN issue. But longer term, we need to decide: is the general logic for EPTR wrong? Should the assert be SPTR <= EPTR? And should EPTR be applied to regexec_flags()'s strend parameter too? --- embed.fnc | 2 +- ext/XS-APItest/APItest.pm | 2 +- ext/XS-APItest/APItest.xs | 23 +++++++++++++++++++++++ ext/XS-APItest/t/callregexec.t | 7 +++++-- proto.h | 2 +- 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/embed.fnc b/embed.fnc index 7e6ea1afe56d..5f67c98edd7f 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2713,7 +2713,7 @@ Adhp |REGEXP *|pregcomp |NN SV * const pattern \ |const U32 flags Adhp |I32 |pregexec |NN REGEXP * const prog \ |MPTR char *stringarg \ - |EPTR char *strend \ + |EPTRQ char *strend \ |SPTR char *strbeg \ |SSize_t minend \ |NN SV *screamer \ diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm index ae76f0519e12..33c23f50f2c7 100644 --- a/ext/XS-APItest/APItest.pm +++ b/ext/XS-APItest/APItest.pm @@ -4,7 +4,7 @@ use strict; use warnings; use Carp; -our $VERSION = '1.47'; +our $VERSION = '1.48'; require XSLoader; diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index dbd03f8314d8..aa68c1fc689f 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -4780,6 +4780,29 @@ CODE: OUTPUT: RETVAL + # provide access to pregexec, except replace pointers within the + # string with offsets from the start of the string + +I32 +callpregexec(SV *prog, STRLEN stringarg, STRLEN strend, I32 minend, SV *sv, U32 nosave) +CODE: + { + STRLEN len; + char *strbeg; + if (SvROK(prog)) + prog = SvRV(prog); + strbeg = SvPV_force(sv, len); + RETVAL = pregexec((REGEXP *)prog, + strbeg + stringarg, + strbeg + strend, + strbeg, + minend, + sv, + nosave); + } +OUTPUT: + RETVAL + void lexical_import(SV *name, CV *cv) CODE: diff --git a/ext/XS-APItest/t/callregexec.t b/ext/XS-APItest/t/callregexec.t index 22446b66f56b..fc8ef8d21add 100644 --- a/ext/XS-APItest/t/callregexec.t +++ b/ext/XS-APItest/t/callregexec.t @@ -1,6 +1,6 @@ #!perl -# test CALLREGEXEC() +# test CALLREGEXEC() and pregexec() # (currently it just checks that it handles non-\0 terminated strings; # full tests haven't been added yet) @@ -10,7 +10,7 @@ use strict; use XS::APItest; *callregexec = *XS::APItest::callregexec; -use Test::More tests => 48; +use Test::More tests => 75; # Test that the regex engine can handle strings without terminating \0 # XXX This is by no means comprehensive; it doesn't test all ops, nor all @@ -34,6 +34,8 @@ sub try { my $bytes = do { use bytes; length $str1 }; ok !!$exp == !!callregexec($re, 0, $bytes, 0, $str, 0), "$desc callregexec"; + ok !!$exp == !!callpregexec($re, 0, $bytes, 0, $str, 0), + "$desc callpregexec"; } @@ -62,4 +64,5 @@ sub try { try "ab\t", qr/^.+\h/, 0, 'HORIZWS'; try "abx", qr/^.+\H/, 1, 'NHORIZWS'; try "abx", qr/a.*x/, 0, 'CURLY'; + try "", qr/x?/, 1, 'empty'; } diff --git a/proto.h b/proto.h index acd96ff396ad..14297a40f654 100644 --- a/proto.h +++ b/proto.h @@ -3607,7 +3607,7 @@ Perl_pregexec(pTHX_ REGEXP * const prog, char *stringarg, char *strend, char *st #define PERL_ARGS_ASSERT_PREGEXEC \ assert(prog); assert(stringarg); assert(strend); assert(strbeg); \ assert(screamer); assert(strbeg <= stringarg); \ - assert(stringarg < strend) + assert(stringarg <= strend) PERL_CALLCONV void Perl_pregfree(pTHX_ REGEXP *r);