Skip to content

Commit e3a0356

Browse files
catropematthiasmullie
authored andcommitted
Use offsets, reduce string copying to improve performance
Performance improvements in Minify::replace(): - Use offsets when regex matching, instead of substr()ing off the processed part of the string - Perform replacements manually, rather than calling preg_replace() / preg_replace_callback() on the entire input string This dramatically improves performance, especially for inputs with many things that are replaced at this stage (many strings, comments or regexes). The only change noticed by other code is that placeholders like $1 or \2 can no longer be used in Minify::registerPattern(), but no callers were using this feature anyway, and future callers can use a callback instead.
1 parent 7f7aadd commit e3a0356

File tree

1 file changed

+35
-39
lines changed

1 file changed

+35
-39
lines changed

src/Minify.php

Lines changed: 35 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,9 @@ protected function save($content, $path)
243243
/**
244244
* Register a pattern to execute against the source content.
245245
*
246+
* If $replacement is a string, it must be plain text. Placeholders like $1 or \2 don't work.
247+
* If you need that functionality, use a callback instead.
248+
*
246249
* @param string $pattern PCRE pattern
247250
* @param string|callable $replacement Replacement value for matched pattern
248251
*/
@@ -268,11 +271,13 @@ protected function registerPattern($pattern, $replacement = '')
268271
*/
269272
protected function replace($content)
270273
{
271-
$processed = '';
274+
$contentLength = strlen($content);
275+
$output = '';
276+
$processedOffset = 0;
272277
$positions = array_fill(0, count($this->patterns), -1);
273278
$matches = array();
274279

275-
while ($content) {
280+
while ($processedOffset < $contentLength) {
276281
// find first match for all patterns
277282
foreach ($this->patterns as $i => $pattern) {
278283
list($pattern, $replacement) = $pattern;
@@ -285,12 +290,12 @@ protected function replace($content)
285290

286291
// no need to re-run matches that are still in the part of the
287292
// content that hasn't been processed
288-
if ($positions[$i] >= 0) {
293+
if ($positions[$i] >= $processedOffset) {
289294
continue;
290295
}
291296

292297
$match = null;
293-
if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE)) {
298+
if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE, $processedOffset)) {
294299
$matches[$i] = $match;
295300

296301
// we'll store the match position as well; that way, we
@@ -307,61 +312,52 @@ protected function replace($content)
307312

308313
// no more matches to find: everything's been processed, break out
309314
if (!$matches) {
310-
$processed .= $content;
315+
// output the remaining content
316+
$output .= substr($content, $processedOffset);
311317
break;
312318
}
313319

314320
// see which of the patterns actually found the first thing (we'll
315321
// only want to execute that one, since we're unsure if what the
316322
// other found was not inside what the first found)
317-
$discardLength = min($positions);
318-
$firstPattern = array_search($discardLength, $positions);
319-
$match = $matches[$firstPattern][0][0];
323+
$matchOffset = min($positions);
324+
$firstPattern = array_search($matchOffset, $positions);
325+
$match = $matches[$firstPattern];
320326

321327
// execute the pattern that matches earliest in the content string
322-
list($pattern, $replacement) = $this->patterns[$firstPattern];
323-
$replacement = $this->replacePattern($pattern, $replacement, $content);
324-
325-
// figure out which part of the string was unmatched; that's the
326-
// part we'll execute the patterns on again next
327-
$content = (string) substr($content, $discardLength);
328-
$unmatched = (string) substr($content, strpos($content, $match) + strlen($match));
329-
330-
// move the replaced part to $processed and prepare $content to
331-
// again match batch of patterns against
332-
$processed .= substr($replacement, 0, strlen($replacement) - strlen($unmatched));
333-
$content = $unmatched;
334-
335-
// first match has been replaced & that content is to be left alone,
336-
// the next matches will start after this replacement, so we should
337-
// fix their offsets
338-
foreach ($positions as $i => $position) {
339-
$positions[$i] -= $discardLength + strlen($match);
340-
}
328+
list(, $replacement) = $this->patterns[$firstPattern];
329+
330+
// add the part of the input between $processedOffset and the first match;
331+
// that content wasn't matched by anything
332+
$output .= substr($content, $processedOffset, $matchOffset - $processedOffset);
333+
// add the replacement for the match
334+
$output .= $this->executeReplacement($replacement, $match);
335+
// advance $processedOffset past the match
336+
$processedOffset = $matchOffset + strlen($match[0][0]);
341337
}
342338

343-
return $processed;
339+
return $output;
344340
}
345341

346342
/**
347-
* This is where a pattern is matched against $content and the matches
348-
* are replaced by their respective value.
349-
* This function will be called plenty of times, where $content will always
350-
* move up 1 character.
343+
* If $replacement is a callback, execute it, passing in the match data.
344+
* If it's a string, just pass it through.
351345
*
352-
* @param string $pattern Pattern to match
353346
* @param string|callable $replacement Replacement value
354-
* @param string $content Content to match pattern against
347+
* @param array $match Match data, in PREG_OFFSET_CAPTURE form
355348
*
356349
* @return string
357350
*/
358-
protected function replacePattern($pattern, $replacement, $content)
351+
protected function executeReplacement($replacement, $match)
359352
{
360-
if (is_callable($replacement)) {
361-
return preg_replace_callback($pattern, $replacement, $content, 1, $count);
362-
} else {
363-
return preg_replace($pattern, $replacement, $content, 1, $count);
353+
if (!is_callable($replacement)) {
354+
return $replacement;
355+
}
356+
// convert $match from the PREG_OFFSET_CAPTURE form to the form the callback expects
357+
foreach ($match as &$matchItem) {
358+
$matchItem = $matchItem[0];
364359
}
360+
return $replacement($match);
365361
}
366362

367363
/**

0 commit comments

Comments
 (0)