Skip to content

Commit 429dcba

Browse files
committed
Add valid_identifier_{pve,pvn,sv} API functions
These functions test whether a given string would be considered by the Perl parser to be a valid identifier. Three variants are provided: one taking a string start/end pair, one a string start/length pair, and one looking at the string contained in an SV.
1 parent 63feb21 commit 429dcba

File tree

9 files changed

+184
-1
lines changed

9 files changed

+184
-1
lines changed

MANIFEST

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5207,6 +5207,7 @@ ext/XS-APItest/t/utf8_warn07.t Tests for code in utf8.c
52075207
ext/XS-APItest/t/utf8_warn08.t Tests for code in utf8.c
52085208
ext/XS-APItest/t/utf8_warn09.t Tests for code in utf8.c
52095209
ext/XS-APItest/t/utf8_warn_base.pl Tests for code in utf8.c
5210+
ext/XS-APItest/t/valid_identifier.t XS::APItest: tests for valid_identifier_sv()
52105211
ext/XS-APItest/t/weaken.t XS::APItest: tests for sv_rvweaken() and sv_get_backrefs()
52115212
ext/XS-APItest/t/whichsig.t XS::APItest: tests for whichsig() and variants
52125213
ext/XS-APItest/t/win32.t Test Win32 specific APIs

embed.fnc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3766,6 +3766,16 @@ EXdpx |bool |validate_proto |NN SV *name \
37663766
|NULLOK SV *proto \
37673767
|bool warn \
37683768
|bool curstash
3769+
Adp |bool |valid_identifier_pve \
3770+
|NN const char *s \
3771+
|NN const char *end \
3772+
|U32 flags
3773+
Adp |bool |valid_identifier_pvn \
3774+
|NN const char *s \
3775+
|STRLEN len \
3776+
|U32 flags
3777+
Adp |bool |valid_identifier_sv \
3778+
|NULLOK SV *sv
37693779
CRTdip |UV |valid_utf8_to_uvchr \
37703780
|NN const U8 *s \
37713781
|NULLOK STRLEN *retlen

embed.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,9 @@
865865
# define uvchr_to_utf8_flags(a,b,c) Perl_uvchr_to_utf8_flags(aTHX,a,b,c)
866866
# define uvchr_to_utf8_flags_msgs(a,b,c,d) Perl_uvchr_to_utf8_flags_msgs(aTHX,a,b,c,d)
867867
# define uvoffuni_to_utf8_flags_msgs(a,b,c,d) Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d)
868+
# define valid_identifier_pve(a,b,c) Perl_valid_identifier_pve(aTHX_ a,b,c)
869+
# define valid_identifier_pvn(a,b,c) Perl_valid_identifier_pvn(aTHX_ a,b,c)
870+
# define valid_identifier_sv(a) Perl_valid_identifier_sv(aTHX_ a)
868871
# define valid_utf8_to_uvchr Perl_valid_utf8_to_uvchr
869872
# define vcmp(a,b) Perl_vcmp(aTHX_ a,b)
870873
# define vcroak(a,b) Perl_vcroak(aTHX_ a,b)

ext/XS-APItest/APItest.pm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use strict;
44
use warnings;
55
use Carp;
66

7-
our $VERSION = '1.39';
7+
our $VERSION = '1.40';
88

99
require XSLoader;
1010

ext/XS-APItest/APItest.xs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7408,6 +7408,12 @@ gimme()
74087408
OUTPUT:
74097409
RETVAL
74107410

7411+
bool
7412+
valid_identifier(SV *s)
7413+
CODE:
7414+
RETVAL = valid_identifier_sv(s);
7415+
OUTPUT:
7416+
RETVAL
74117417

74127418
MODULE = XS::APItest PACKAGE = XS::APItest::Backrefs
74137419

ext/XS-APItest/t/valid_identifier.t

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!perl
2+
3+
use strict;
4+
use warnings;
5+
6+
use open ':std', ':encoding(UTF-8)';
7+
use Test::More;
8+
9+
use_ok('XS::APItest');
10+
11+
# These should all be valid
12+
foreach my $id (qw( abc ab_cd _abc x123 )) {
13+
ok(valid_identifier($id), "'$id' is valid identifier");
14+
}
15+
16+
# These should all not be
17+
foreach my $id (qw( ab-cd 123 abc() ), "ab cd") {
18+
ok(!valid_identifier($id), "'$id' is not valid identifier");
19+
}
20+
21+
# Now for some UTF-8 tests
22+
{
23+
use utf8;
24+
25+
foreach my $id (qw( café sandviĉon )) {
26+
ok(valid_identifier($id), "'$id' is valid UTF-8 identifier");
27+
}
28+
29+
# en-dash
30+
ok(!valid_identifier("ab–cd"), "'ab–cd' is not valid UTF-8 identifier");
31+
}
32+
33+
# objects with "" overloading still work
34+
{
35+
package WithStringify {
36+
use overload '""' => sub { return "an_identifier"; };
37+
sub new { bless [], shift; }
38+
}
39+
40+
ok(valid_identifier(WithStringify->new), 'Object with stringify overload can be valid identifier');
41+
}
42+
43+
done_testing;

pod/perldelta.pod

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,13 @@ well.
350350

351351
XXX
352352

353+
=item *
354+
355+
New API functions L<C<valid_identifier_pve()>|perlapi/valid_identifier_pve>,
356+
L<C<valid_identifier_pvn()>|perlapi/valid_identifier_pvn> and
357+
L<C<valid_identifier_sv()>|perlapi/valid_identifier_sv> have been added, which
358+
test if a string would be considered by Perl to be a valid identifier name.
359+
353360
=back
354361

355362
=head1 Selected Bug Fixes

proto.h

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

toke.c

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13932,6 +13932,105 @@ Perl_parse_subsignature(pTHX_ U32 flags)
1393213932
return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
1393313933
}
1393413934

13935+
/*
13936+
=for apidoc valid_identifier_pve
13937+
13938+
Returns true if the string given by C<s> until C<end> would be considered
13939+
valid as a Perl identifier. That is, it must begin with a character matching
13940+
C<isIDFIRST>, followed by characters all matching C<isIDCONT>. An empty
13941+
string (i.e. when C<end> is C<s>) will return false.
13942+
13943+
If C<flags> contains the C<SVf_UTF8> bit, then the string is presumed to be
13944+
encoded in UTF-8, and suitable Unicode character test functions will be used.
13945+
13946+
=cut
13947+
*/
13948+
13949+
bool
13950+
Perl_valid_identifier_pve(pTHX_ const char *s, const char *end, U32 flags)
13951+
{
13952+
PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVE;
13953+
13954+
if(end <= s)
13955+
return false;
13956+
13957+
if(flags & SVf_UTF8) {
13958+
if(!isIDFIRST_utf8_safe((U8 *)s, (U8 *)end))
13959+
return false;
13960+
13961+
while(s < end) {
13962+
s += UTF8SKIP((U8 *)s);
13963+
if(s == end)
13964+
break;
13965+
if(!isIDCONT_utf8_safe((U8 *)s, (U8 *)end))
13966+
return false;
13967+
}
13968+
return true;
13969+
}
13970+
else {
13971+
if(!isIDFIRST(s[0]))
13972+
return false;
13973+
13974+
while(s < end) {
13975+
s += 1;
13976+
if(s == end)
13977+
break;
13978+
if(!isIDCONT(s[0]))
13979+
return false;
13980+
}
13981+
return true;
13982+
}
13983+
13984+
return false;
13985+
}
13986+
13987+
/*
13988+
=for apidoc valid_identifier_pvn
13989+
13990+
Returns true if the string given by C<s> whose length is C<len> would be
13991+
considered valid as a Perl identifier. That is, it must begin with a
13992+
character matching C<isIDFIRST>, followed by characters all matching
13993+
C<isIDCONT>. An empty string (i.e. when C<len> is zero) will return false.
13994+
13995+
If C<flags> contains the C<SVf_UTF8> bit, then the string is presumed to be
13996+
encoded in UTF-8, and suitable Unicode character test functions will be used.
13997+
13998+
=cut
13999+
*/
14000+
14001+
bool
14002+
Perl_valid_identifier_pvn(pTHX_ const char *s, STRLEN len, U32 flags)
14003+
{
14004+
PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVN;
14005+
14006+
return valid_identifier_pve(s, s + len, flags);
14007+
}
14008+
14009+
/*
14010+
=for apidoc valid_identifier_sv
14011+
14012+
Returns true if the given SV contains a non-empty string whose characters
14013+
match accoding to C<valid_identifier_pvn>. Returns false if given NULL, an
14014+
undefined SV, or a SV that does not contain a non-empty string.
14015+
14016+
Does not invoke C<get> magic on the SV beforehand.
14017+
14018+
=cut
14019+
*/
14020+
14021+
bool
14022+
Perl_valid_identifier_sv(pTHX_ SV *sv)
14023+
{
14024+
PERL_ARGS_ASSERT_VALID_IDENTIFIER_SV;
14025+
14026+
if(!sv || !SvOK(sv))
14027+
return false;
14028+
14029+
STRLEN len;
14030+
const char *pv = SvPV_const(sv, len);
14031+
return valid_identifier_pve(pv, pv + len, SvUTF8(sv));
14032+
}
14033+
1393514034
/*
1393614035
* ex: set ts=8 sts=4 sw=4 et:
1393714036
*/

0 commit comments

Comments
 (0)