Skip to content

Commit 9efc354

Browse files
committed
WIP
1 parent ada7646 commit 9efc354

File tree

1 file changed

+52
-28
lines changed

1 file changed

+52
-28
lines changed

src/DotNotationParser.php

Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,19 @@ class DotNotationParser {
1717
* @return string[]
1818
*/
1919
public function parse( string $path ) : array {
20-
$out = [];
21-
$chars = preg_split('/(?<!^)(?!$)/u', $path, -1, PREG_SPLIT_NO_EMPTY) ?: [];
20+
$out = [];
2221

23-
for(;;) {
24-
$token = current($chars);
25-
if( $token === false ) {
26-
break;
27-
}
22+
$chars = $this->graphemeIter($path);
23+
24+
while( $chars->valid() ) {
25+
$token = $chars->current();
26+
$key = $chars->key();
2827

2928
switch( $token ) {
3029
case '.':
3130
throw new ParseException(
32-
sprintf('failed to parse path, expected string, got "%s" at %d', $token, key($chars)),
33-
key($chars),
31+
sprintf('failed to parse path, expected string, got "%s" at %d', $token, $key),
32+
$key,
3433
ParseException::CODE_UNEXPECTED_CHARACTER
3534
);
3635
case '"':
@@ -40,64 +39,89 @@ public function parse( string $path ) : array {
4039
$out[] = $this->scanString($chars);
4140
break;
4241
}
42+
43+
$chars->next();
4344
}
4445

4546
return $out;
4647
}
4748

4849
/**
49-
* @param string[] $chars array of unicode characters by reference
50+
* Yields each grapheme (user‑visible “character”) from $s.
51+
*
52+
* @return \Generator<string>
5053
*/
51-
private function scanString( array &$chars ) : string {
54+
private function graphemeIter( string $s ) : \Generator {
55+
$off = 0;
56+
$len = strlen($s);
57+
58+
while( $off < $len && preg_match('/\X/u', $s, $m, 0, $off) ) {
59+
$g = $m[0]; // one grapheme cluster, UTF‑8 safe
60+
$off += strlen($g); // advance by its byte length
61+
62+
yield $off => $g;
63+
}
64+
}
65+
66+
/**
67+
* @param \Generator<string> $chars Generator of Unicode characters
68+
*/
69+
private function scanString( \Generator $chars ) : string {
5270
$buff = '';
53-
for(;;) {
54-
$token = current($chars);
55-
if( $token === false || $token === '.' ) {
56-
next($chars);
71+
while( $chars->valid() ) {
72+
$token = $chars->current();
73+
74+
if( !$chars->valid() || $token === '.' ) {
75+
$chars->next();
5776

5877
break;
5978
}
6079

6180
$buff .= $token;
62-
next($chars);
81+
$chars->next();
6382
}
6483

6584
return $buff;
6685
}
6786

6887
/**
69-
* @param string[] $chars array of unicode characters by reference
88+
* @param \Generator<string> $chars array of Unicode characters by reference
7089
*/
71-
private function scanQuotedString( array &$chars ) : string {
90+
private function scanQuotedString( \Generator $chars ) : string {
7291
$buff = '';
7392

74-
next($chars);
75-
for(;;) {
76-
$token = current($chars);
93+
$chars->next();
94+
while( $chars->valid() ) {
95+
$token = $chars->current();
96+
$key = $chars->key();
97+
7798
if( $token === false ) {
7899
throw new ParseException(
79100
'failed to parse path, expected ", got EOF',
80-
key($chars) ?: count($chars),
101+
$key ?: count($chars),
81102
ParseException::CODE_UNEXPECTED_EOF
82103
);
83104
}
84105

85106
if( $token === '"' ) {
86-
$next = next($chars);
87-
if( $next === false || $next === '.' ) {
88-
next($chars);
107+
$chars->next();
108+
$next = $chars->current();
109+
110+
if( !$chars->valid() || $next === '.' ) {
111+
$chars->next();
89112
break;
90113
}
91114

92115
throw new ParseException(
93-
sprintf('failed to parse path, expected . or EOF, got "%s" at %d', $next, key($chars)),
94-
key($chars),
116+
sprintf('failed to parse path, expected . or EOF, got "%s" at %d', $next, $key),
117+
$key,
95118
ParseException::CODE_UNEXPECTED_CHARACTER
96119
);
97120
}
98121

99122
$buff .= $token;
100-
next($chars);
123+
124+
$chars->next();
101125
}
102126

103127
return $buff;

0 commit comments

Comments
 (0)