@@ -17,20 +17,19 @@ class DotNotationParser {
1717 * @return string[]
1818 */
1919 public function parse ( string $ path ) : array {
20- $ out = [];
21- $ chars = preg_split ('/(?<!^)(?!$)/u ' , $ path , -1 , PREG_SPLIT_NO_EMPTY ) ?: [];
20+ $ out = [];
2221
23- for (;;) {
24- $ token = current ( $ chars );
25- if ( $ token === false ) {
26- break ;
27- }
22+ $ chars = $ this -> graphemeIter ( $ path );
23+
24+ while ( $ chars -> valid () ) {
25+ $ token = $ chars -> current () ;
26+ $ key = $ chars -> key ();
2827
2928 switch ( $ token ) {
3029 case '. ' :
3130 throw new ParseException (
32- sprintf ('failed to parse path, expected string, got "%s" at %d ' , $ token , key ( $ chars ) ),
33- key ( $ chars ) ,
31+ sprintf ('failed to parse path, expected string, got "%s" at %d ' , $ token , $ key ),
32+ $ key ,
3433 ParseException::CODE_UNEXPECTED_CHARACTER
3534 );
3635 case '" ' :
@@ -40,64 +39,89 @@ public function parse( string $path ) : array {
4039 $ out [] = $ this ->scanString ($ chars );
4140 break ;
4241 }
42+
43+ $ chars ->next ();
4344 }
4445
4546 return $ out ;
4647 }
4748
4849 /**
49- * @param string[] $chars array of unicode characters by reference
50+ * Yields each grapheme (user‑visible “character”) from $s.
51+ *
52+ * @return \Generator<string>
5053 */
51- private function scanString ( array &$ chars ) : string {
54+ private function graphemeIter ( string $ s ) : \Generator {
55+ $ off = 0 ;
56+ $ len = strlen ($ s );
57+
58+ while ( $ off < $ len && preg_match ('/\X/u ' , $ s , $ m , 0 , $ off ) ) {
59+ $ g = $ m [0 ]; // one grapheme cluster, UTF‑8 safe
60+ $ off += strlen ($ g ); // advance by its byte length
61+
62+ yield $ off => $ g ;
63+ }
64+ }
65+
66+ /**
67+ * @param \Generator<string> $chars Generator of Unicode characters
68+ */
69+ private function scanString ( \Generator $ chars ) : string {
5270 $ buff = '' ;
53- for (;;) {
54- $ token = current ($ chars );
55- if ( $ token === false || $ token === '. ' ) {
56- next ($ chars );
71+ while ( $ chars ->valid () ) {
72+ $ token = $ chars ->current ();
73+
74+ if ( !$ chars ->valid () || $ token === '. ' ) {
75+ $ chars ->next ();
5776
5877 break ;
5978 }
6079
6180 $ buff .= $ token ;
62- next ( $ chars );
81+ $ chars-> next ( );
6382 }
6483
6584 return $ buff ;
6685 }
6786
6887 /**
69- * @param string[] $chars array of unicode characters by reference
88+ * @param \Generator< string> $chars array of Unicode characters by reference
7089 */
71- private function scanQuotedString ( array & $ chars ) : string {
90+ private function scanQuotedString ( \ Generator $ chars ) : string {
7291 $ buff = '' ;
7392
74- next ($ chars );
75- for (;;) {
76- $ token = current ($ chars );
93+ $ chars ->next ();
94+ while ( $ chars ->valid () ) {
95+ $ token = $ chars ->current ();
96+ $ key = $ chars ->key ();
97+
7798 if ( $ token === false ) {
7899 throw new ParseException (
79100 'failed to parse path, expected ", got EOF ' ,
80- key ( $ chars ) ?: count ($ chars ),
101+ $ key ?: count ($ chars ),
81102 ParseException::CODE_UNEXPECTED_EOF
82103 );
83104 }
84105
85106 if ( $ token === '" ' ) {
86- $ next = next ($ chars );
87- if ( $ next === false || $ next === '. ' ) {
88- next ($ chars );
107+ $ chars ->next ();
108+ $ next = $ chars ->current ();
109+
110+ if ( !$ chars ->valid () || $ next === '. ' ) {
111+ $ chars ->next ();
89112 break ;
90113 }
91114
92115 throw new ParseException (
93- sprintf ('failed to parse path, expected . or EOF, got "%s" at %d ' , $ next , key ( $ chars ) ),
94- key ( $ chars ) ,
116+ sprintf ('failed to parse path, expected . or EOF, got "%s" at %d ' , $ next , $ key ),
117+ $ key ,
95118 ParseException::CODE_UNEXPECTED_CHARACTER
96119 );
97120 }
98121
99122 $ buff .= $ token ;
100- next ($ chars );
123+
124+ $ chars ->next ();
101125 }
102126
103127 return $ buff ;
0 commit comments