@@ -243,6 +243,9 @@ protected function save($content, $path)
243243 /**
244244 * Register a pattern to execute against the source content.
245245 *
246+ * If $replacement is a string, it must be plain text. Placeholders like $1 or \2 don't work.
247+ * If you need that functionality, use a callback instead.
248+ *
246249 * @param string $pattern PCRE pattern
247250 * @param string|callable $replacement Replacement value for matched pattern
248251 */
@@ -268,11 +271,13 @@ protected function registerPattern($pattern, $replacement = '')
268271 */
269272 protected function replace ($ content )
270273 {
271- $ processed = '' ;
274+ $ contentLength = strlen ($ content );
275+ $ output = '' ;
276+ $ processedOffset = 0 ;
272277 $ positions = array_fill (0 , count ($ this ->patterns ), -1 );
273278 $ matches = array ();
274279
275- while ($ content ) {
280+ while ($ processedOffset < $ contentLength ) {
276281 // find first match for all patterns
277282 foreach ($ this ->patterns as $ i => $ pattern ) {
278283 list ($ pattern , $ replacement ) = $ pattern ;
@@ -285,12 +290,12 @@ protected function replace($content)
285290
286291 // no need to re-run matches that are still in the part of the
287292 // content that hasn't been processed
288- if ($ positions [$ i ] >= 0 ) {
293+ if ($ positions [$ i ] >= $ processedOffset ) {
289294 continue ;
290295 }
291296
292297 $ match = null ;
293- if (preg_match ($ pattern , $ content , $ match , PREG_OFFSET_CAPTURE )) {
298+ if (preg_match ($ pattern , $ content , $ match , PREG_OFFSET_CAPTURE , $ processedOffset )) {
294299 $ matches [$ i ] = $ match ;
295300
296301 // we'll store the match position as well; that way, we
@@ -307,61 +312,52 @@ protected function replace($content)
307312
308313 // no more matches to find: everything's been processed, break out
309314 if (!$ matches ) {
310- $ processed .= $ content ;
315+ // output the remaining content
316+ $ output .= substr ($ content , $ processedOffset );
311317 break ;
312318 }
313319
314320 // see which of the patterns actually found the first thing (we'll
315321 // only want to execute that one, since we're unsure if what the
316322 // other found was not inside what the first found)
317- $ discardLength = min ($ positions );
318- $ firstPattern = array_search ($ discardLength , $ positions );
319- $ match = $ matches [$ firstPattern ][ 0 ][ 0 ] ;
323+ $ matchOffset = min ($ positions );
324+ $ firstPattern = array_search ($ matchOffset , $ positions );
325+ $ match = $ matches [$ firstPattern ];
320326
321327 // execute the pattern that matches earliest in the content string
322- list ($ pattern , $ replacement ) = $ this ->patterns [$ firstPattern ];
323- $ replacement = $ this ->replacePattern ($ pattern , $ replacement , $ content );
324-
325- // figure out which part of the string was unmatched; that's the
326- // part we'll execute the patterns on again next
327- $ content = (string ) substr ($ content , $ discardLength );
328- $ unmatched = (string ) substr ($ content , strpos ($ content , $ match ) + strlen ($ match ));
329-
330- // move the replaced part to $processed and prepare $content to
331- // again match batch of patterns against
332- $ processed .= substr ($ replacement , 0 , strlen ($ replacement ) - strlen ($ unmatched ));
333- $ content = $ unmatched ;
334-
335- // first match has been replaced & that content is to be left alone,
336- // the next matches will start after this replacement, so we should
337- // fix their offsets
338- foreach ($ positions as $ i => $ position ) {
339- $ positions [$ i ] -= $ discardLength + strlen ($ match );
340- }
328+ list (, $ replacement ) = $ this ->patterns [$ firstPattern ];
329+
330+ // add the part of the input between $processedOffset and the first match;
331+ // that content wasn't matched by anything
332+ $ output .= substr ($ content , $ processedOffset , $ matchOffset - $ processedOffset );
333+ // add the replacement for the match
334+ $ output .= $ this ->executeReplacement ($ replacement , $ match );
335+ // advance $processedOffset past the match
336+ $ processedOffset = $ matchOffset + strlen ($ match [0 ][0 ]);
341337 }
342338
343- return $ processed ;
339+ return $ output ;
344340 }
345341
346342 /**
347- * This is where a pattern is matched against $content and the matches
348- * are replaced by their respective value.
349- * This function will be called plenty of times, where $content will always
350- * move up 1 character.
343+ * If $replacement is a callback, execute it, passing in the match data.
344+ * If it's a string, just pass it through.
351345 *
352- * @param string $pattern Pattern to match
353346 * @param string|callable $replacement Replacement value
354- * @param string $content Content to match pattern against
347+ * @param array $match Match data, in PREG_OFFSET_CAPTURE form
355348 *
356349 * @return string
357350 */
358- protected function replacePattern ( $ pattern , $ replacement , $ content )
351+ protected function executeReplacement ( $ replacement , $ match )
359352 {
360- if (is_callable ($ replacement )) {
361- return preg_replace_callback ($ pattern , $ replacement , $ content , 1 , $ count );
362- } else {
363- return preg_replace ($ pattern , $ replacement , $ content , 1 , $ count );
353+ if (!is_callable ($ replacement )) {
354+ return $ replacement ;
355+ }
356+ // convert $match from the PREG_OFFSET_CAPTURE form to the form the callback expects
357+ foreach ($ match as &$ matchItem ) {
358+ $ matchItem = $ matchItem [0 ];
364359 }
360+ return $ replacement ($ match );
365361 }
366362
367363 /**
0 commit comments