@@ -474,11 +474,16 @@ public static function split(
474474 string $ pattern ,
475475 bool |int $ captureOffset = false ,
476476 bool $ skipEmpty = false ,
477+ bool $ utf8Offset = false ,
477478 ): array {
478479 $ flags = is_int ($ captureOffset ) // back compatibility
479480 ? $ captureOffset
480481 : ($ captureOffset ? PREG_SPLIT_OFFSET_CAPTURE : 0 ) | ($ skipEmpty ? PREG_SPLIT_NO_EMPTY : 0 );
481- return self ::pcre ('preg_split ' , [$ pattern , $ subject , -1 , $ flags | PREG_SPLIT_DELIM_CAPTURE ]);
482+ $ m = self ::pcre ('preg_split ' , [$ pattern , $ subject , -1 , $ flags | PREG_SPLIT_DELIM_CAPTURE ]);
483+ if ($ utf8Offset && ($ flags & PREG_SPLIT_OFFSET_CAPTURE )) {
484+ return self ::bytesToChars ($ subject , [$ m ])[0 ];
485+ }
486+ return $ m ;
482487 }
483488
484489
@@ -491,16 +496,24 @@ public static function match(
491496 bool |int $ captureOffset = false ,
492497 int $ offset = 0 ,
493498 bool $ unmatchedAsNull = false ,
499+ bool $ utf8Offset = false ,
494500 ): ?array {
495501 $ flags = is_int ($ captureOffset ) // back compatibility
496502 ? $ captureOffset
497503 : ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 );
504+ if ($ utf8Offset ) {
505+ $ offset = strlen (self ::substring ($ subject , 0 , $ offset ));
506+ }
498507 if ($ offset > strlen ($ subject )) {
499508 return null ;
500509 }
501- return self ::pcre ('preg_match ' , [$ pattern , $ subject , &$ m , $ flags , $ offset ])
502- ? $ m
503- : null ;
510+ if (!self ::pcre ('preg_match ' , [$ pattern , $ subject , &$ m , $ flags , $ offset ])) {
511+ return null ;
512+ }
513+ if ($ utf8Offset && ($ flags & PREG_OFFSET_CAPTURE )) {
514+ return self ::bytesToChars ($ subject , [$ m ])[0 ];
515+ }
516+ return $ m ;
504517 }
505518
506519
@@ -515,10 +528,14 @@ public static function matchAll(
515528 int $ offset = 0 ,
516529 bool $ unmatchedAsNull = false ,
517530 bool $ patternOrder = false ,
531+ bool $ utf8Offset = false ,
518532 ): array {
519533 $ flags = is_int ($ captureOffset ) // back compatibility
520534 ? $ captureOffset
521535 : ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 ) | ($ patternOrder ? PREG_PATTERN_ORDER : 0 );
536+ if ($ utf8Offset ) {
537+ $ offset = strlen (self ::substring ($ subject , 0 , $ offset ));
538+ }
522539 if ($ offset > strlen ($ subject )) {
523540 return [];
524541 }
@@ -527,6 +544,9 @@ public static function matchAll(
527544 ($ flags & PREG_PATTERN_ORDER ) ? $ flags : ($ flags | PREG_SET_ORDER ),
528545 $ offset ,
529546 ]);
547+ if ($ utf8Offset && ($ flags & PREG_OFFSET_CAPTURE )) {
548+ return self ::bytesToChars ($ subject , $ m );
549+ }
530550 return $ m ;
531551 }
532552
@@ -541,12 +561,16 @@ public static function replace(
541561 int $ limit = -1 ,
542562 bool $ captureOffset = false ,
543563 bool $ unmatchedAsNull = false ,
564+ bool $ utf8Offset = false ,
544565 ): string {
545566 if (is_object ($ replacement ) || is_array ($ replacement )) {
546567 if (!is_callable ($ replacement , false , $ textual )) {
547568 throw new Nette \InvalidStateException ("Callback ' $ textual' is not callable. " );
548569 }
549570 $ flags = ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 );
571+ if ($ utf8Offset && $ captureOffset ) {
572+ $ replacement = fn ($ m ) => $ replacement (self ::bytesToChars ($ subject , [$ m ])[0 ]);
573+ }
550574 return self ::pcre ('preg_replace_callback ' , [$ pattern , $ replacement , $ subject , $ limit , 0 , $ flags ]);
551575
552576 } elseif (is_array ($ pattern ) && is_string (key ($ pattern ))) {
@@ -558,6 +582,22 @@ public static function replace(
558582 }
559583
560584
585+ private static function bytesToChars (string $ s , array $ groups ): array
586+ {
587+ $ lastBytes = $ lastChars = 0 ;
588+ foreach ($ groups as &$ matches ) {
589+ foreach ($ matches as &$ match ) {
590+ if ($ match [1 ] > $ lastBytes ) {
591+ $ lastChars += self ::length (substr ($ s , $ lastBytes , $ match [1 ] - $ lastBytes ));
592+ $ lastBytes = $ match [1 ];
593+ }
594+ $ match [1 ] = $ lastChars ;
595+ }
596+ }
597+ return $ groups ;
598+ }
599+
600+
561601 /** @internal */
562602 public static function pcre (string $ func , array $ args )
563603 {
0 commit comments