Skip to content

Commit cab57c4

Browse files
authored
Merge pull request #1 from ENM1989/feature/optimize-array-combine
Optimize array_combine with a fast path and fix memory leak
2 parents 0bf2959 + 967b3e4 commit cab57c4

14 files changed

+559
-26
lines changed

benchmark.php

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<?php
2+
3+
function benchmark_strtoupper(string $str) {
4+
$start = microtime(true);
5+
for ($i = 0; $i < 100000; $i++) {
6+
strtoupper($str);
7+
}
8+
$end = microtime(true);
9+
return $end - $start;
10+
}
11+
12+
$short_ascii = 'abcdefghijklmnopqrstuvwxyz';
13+
$long_ascii = str_repeat($short_ascii, 1000);
14+
$short_mixed = 'aBcDeFgHiJkLmNoPqRsTuVwXyZ';
15+
$long_mixed = str_repeat($short_mixed, 1000);
16+
$short_upper = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
17+
$long_upper = str_repeat($short_upper, 1000);
18+
19+
echo "Benchmarking current strtoupper implementation...\n";
20+
21+
echo "Short ASCII string: " . benchmark_strtoupper($short_ascii) . "s\n";
22+
echo "Long ASCII string: " . benchmark_strtoupper($long_ascii) . "s\n";
23+
echo "Short mixed-case string: " . benchmark_strtoupper($short_mixed) . "s\n";
24+
echo "Long mixed-case string: " . benchmark_strtoupper($long_mixed) . "s\n";
25+
echo "Short uppercase string: " . benchmark_strtoupper($short_upper) . "s\n";
26+
echo "Long uppercase string: " . benchmark_strtoupper($long_upper) . "s\n";

benchmark_array_combine.php

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<?php
2+
3+
function benchmark($name, $func) {
4+
$start = microtime(true);
5+
// Run the function multiple times to get a more stable reading
6+
for ($i = 0; $i < 10; $i++) {
7+
$func();
8+
}
9+
$end = microtime(true);
10+
printf("%-40s: %8.4f seconds\n", $name, ($end - $start));
11+
}
12+
13+
const LARGE_SIZE = 1000000;
14+
15+
// --- Scenario 1: Packed Indexed Arrays (Target for optimization) ---
16+
$indexed_keys = range(0, LARGE_SIZE - 1);
17+
$indexed_values = range(0, LARGE_SIZE - 1);
18+
19+
benchmark("Packed Indexed Arrays", function() use ($indexed_keys, $indexed_values) {
20+
$a = array_combine($indexed_keys, $indexed_values);
21+
});
22+
23+
24+
// --- Scenario 2: Associative Arrays (Check for regressions) ---
25+
$assoc_keys = [];
26+
$assoc_values = [];
27+
for ($i = 0; $i < LARGE_SIZE; $i++) {
28+
$assoc_keys[] = "key_" . $i;
29+
$assoc_values[] = $i;
30+
}
31+
32+
benchmark("Associative Arrays", function() use ($assoc_keys, $assoc_values) {
33+
$a = array_combine($assoc_keys, $assoc_values);
34+
});
35+
36+
echo "\n";
37+
38+
?>

benchmark_grapheme_str_split.php

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<?php
2+
3+
function benchmark($name, $func) {
4+
// A relatively low iteration count because grapheme operations can be intensive.
5+
$iterations = 500;
6+
$start = microtime(true);
7+
for ($i = 0; $i < $iterations; $i++) {
8+
$func();
9+
}
10+
$end = microtime(true);
11+
echo str_pad($name, 45) . ": " . number_format($end - $start, 6) . "s\n";
12+
}
13+
14+
echo "Benchmarking current grapheme_str_split implementation...\n";
15+
16+
// A long string (~50KB) mixing ASCII, multi-byte, and complex graphemes.
17+
$long_string = str_repeat("Hello world! Это тест. The quick brown 👨‍👩‍👧‍👦 fox. Á, B́, Ć.", 200);
18+
19+
// ===== TEST CASES =====
20+
21+
// Case 1: Simple ASCII string
22+
benchmark("Simple ASCII string", function() {
23+
grapheme_str_split("abcdefghijklmnopqrstuvwxyz");
24+
});
25+
26+
// Case 2: Multi-byte UTF-8 string (Cyrillic)
27+
benchmark("Multi-byte UTF-8 string (Cyrillic)", function() {
28+
grapheme_str_split("абвгдеёжзийклмнопрстуфхцчшщъыьэюя");
29+
});
30+
31+
// Case 3: Complex Graphemes (Combining Marks)
32+
// 'e' with 3 combining marks is one grapheme
33+
benchmark("Complex Graphemes (Combining Marks)", function() {
34+
grapheme_str_split("é̄̃");
35+
});
36+
37+
// Case 4: Complex Graphemes (Emoji)
38+
// Family emoji and woman with skin tone modifier are single graphemes
39+
benchmark("Complex Graphemes (Emoji)", function() {
40+
grapheme_str_split("👨‍👩‍👧‍👦👩🏽‍💻");
41+
});
42+
43+
// Case 5: Long mixed string
44+
benchmark("Long mixed string", function() use ($long_string) {
45+
grapheme_str_split($long_string);
46+
});
47+
48+
?>

benchmark_range.php

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<?php
2+
3+
function benchmark($name, $func) {
4+
$start = microtime(true);
5+
$func();
6+
$end = microtime(true);
7+
printf("%-40s: %8.4f seconds\n", $name, $end - $start);
8+
}
9+
10+
const LARGE_INT = 10000000;
11+
const LARGE_FLOAT = 10000000.0;
12+
13+
// --- Integer Benchmarks ---
14+
benchmark("Integer Range (1 to 10,000,000)", function() {
15+
$a = range(1, LARGE_INT);
16+
});
17+
18+
benchmark("Integer Range (10,000,000 to 1)", function() {
19+
$a = range(LARGE_INT, 1);
20+
});
21+
22+
benchmark("Integer Range with Step (1 to 10,000,000, step 2)", function() {
23+
$a = range(1, LARGE_INT, 2);
24+
});
25+
26+
27+
// --- Float Benchmarks ---
28+
benchmark("Float Range (1.0 to 10,000,000.0)", function() {
29+
$a = range(1.0, LARGE_FLOAT);
30+
});
31+
32+
benchmark("Float Range with Step (10,000,000.0 to 1.0, step 2.5)", function() {
33+
$a = range(LARGE_FLOAT, 1.0, 2.5);
34+
});
35+
36+
37+
// --- Character Benchmark ---
38+
benchmark("Character Range ('a' to 'z')", function() {
39+
$a = range('a', 'z');
40+
});
41+
42+
echo "\n";
43+
44+
?>

benchmark_str_decrement.php

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
<?php
2+
3+
function benchmark($name, $func) {
4+
$start = microtime(true);
5+
// Using a smaller iteration count because string operations can be slow
6+
for ($i = 0; $i < 20000; $i++) {
7+
$func();
8+
}
9+
$end = microtime(true);
10+
echo "$name: " . ($end - $start) . "s\n";
11+
}
12+
13+
echo "Benchmarking current str_decrement implementation...\n";
14+
15+
// Case 1: Simple numeric string, no borrow needed.
16+
$s1 = "123456789123456789";
17+
benchmark("Simple numeric string", function() use ($s1) {
18+
str_decrement($s1);
19+
});
20+
21+
// Case 2: Long numeric string with full borrow. This is the key case for my optimization.
22+
$s2 = "1" . str_repeat("0", 50);
23+
benchmark("Long numeric string with full borrow", function() use ($s2) {
24+
str_decrement($s2);
25+
});
26+
27+
// Case 3: Simple alphanumeric string, no borrow needed.
28+
$s3 = "abcdefg9";
29+
benchmark("Simple alphanumeric string", function() use ($s3) {
30+
str_decrement($s3);
31+
});
32+
33+
// Case 4: Long alphanumeric string with numeric borrow.
34+
$s4 = "b" . str_repeat("0", 50);
35+
benchmark("Long alphanumeric with numeric borrow", function() use ($s4) {
36+
str_decrement($s4);
37+
});
38+
39+
// Case 5: Long alphanumeric string with letter borrow.
40+
$s5 = "z" . str_repeat("a", 50);
41+
benchmark("Long alphanumeric with letter borrow", function() use ($s5) {
42+
str_decrement($s5);
43+
});
44+
45+
// Case 6: A long string that does not trigger the leading-zero optimization.
46+
$s6 = "2" . str_repeat("0", 50);
47+
benchmark("Long numeric string without leading-zero result", function() use ($s6) {
48+
str_decrement($s6);
49+
});
50+
51+
?>

benchmark_str_ends_with.php

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
<?php
2+
3+
function benchmark($name, $func) {
4+
$start = microtime(true);
5+
// This function is very fast, so a high iteration count is needed.
6+
for ($i = 0; $i < 500000; $i++) {
7+
$func();
8+
}
9+
$end = microtime(true);
10+
echo str_pad($name, 40) . ": " . number_format($end - $start, 6) . "s\n";
11+
}
12+
13+
echo "Benchmarking current str_ends_with implementation...\n";
14+
15+
$long_haystack = "This is a very long string that is used for benchmarking purposes to see how the function performs with a significant amount of data to process.";
16+
$long_needle_match = "a significant amount of data to process.";
17+
$long_needle_no_match = "a significant amount of data to process!";
18+
19+
// Case 1: Haystack shorter than needle (fast path)
20+
benchmark("Haystack shorter than needle", function() {
21+
str_ends_with("short", "this is much longer");
22+
});
23+
24+
// Case 2: Matching short needle
25+
benchmark("Matching short needle", function() use ($long_haystack) {
26+
str_ends_with($long_haystack, "process.");
27+
});
28+
29+
// Case 3: Non-matching short needle
30+
benchmark("Non-matching short needle", function() use ($long_haystack) {
31+
str_ends_with($long_haystack, "process!");
32+
});
33+
34+
// Case 4: Matching long needle
35+
benchmark("Matching long needle", function() use ($long_haystack, $long_needle_match) {
36+
str_ends_with($long_haystack, $long_needle_match);
37+
});
38+
39+
// Case 5: Non-matching long needle
40+
benchmark("Non-matching long needle", function() use ($long_haystack, $long_needle_no_match) {
41+
str_ends_with($long_haystack, $long_needle_no_match);
42+
});
43+
44+
// Case 6: Matching empty needle (edge case)
45+
benchmark("Matching empty needle", function() use ($long_haystack) {
46+
str_ends_with($long_haystack, "");
47+
});
48+
49+
?>

benchmark_str_increment.php

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
<?php
2+
3+
function benchmark($name, $func) {
4+
$start = microtime(true);
5+
for ($i = 0; $i < 20000; $i++) {
6+
$func();
7+
}
8+
$end = microtime(true);
9+
echo "$name: " . ($end - $start) . "s\n";
10+
}
11+
12+
echo "Benchmarking current str_increment implementation...\n";
13+
14+
// Case 1: Simple numeric string, no carry needed.
15+
$s1 = "123456788";
16+
benchmark("Simple numeric string", function() use ($s1) {
17+
str_increment($s1);
18+
});
19+
20+
// Case 2: Long numeric string with full carry. This is the key case for my optimization.
21+
$s2 = str_repeat("9", 50);
22+
benchmark("Long numeric string with full carry", function() use ($s2) {
23+
str_increment($s2);
24+
});
25+
26+
// Case 3: Simple alphanumeric string, no carry needed.
27+
$s3 = "abcde8";
28+
benchmark("Simple alphanumeric string", function() use ($s3) {
29+
str_increment($s3);
30+
});
31+
32+
// Case 4: Long alphanumeric string with numeric carry.
33+
$s4 = "a" . str_repeat("9", 50);
34+
benchmark("Long alphanumeric with numeric carry", function() use ($s4) {
35+
str_increment($s4);
36+
});
37+
38+
// Case 5: Long alphanumeric string with full letter and numeric carry.
39+
$s5 = "z" . str_repeat("9", 50);
40+
benchmark("Long alphanumeric with full carry", function() use ($s5) {
41+
str_increment($s5);
42+
});
43+
44+
// Case 6: A long string that does not trigger reallocation.
45+
$s6 = "8" . str_repeat("9", 50);
46+
benchmark("Long numeric string without reallocation", function() use ($s6) {
47+
str_increment($s6);
48+
});
49+
50+
?>

benchmark_str_ireplace.php

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<?php
2+
3+
function benchmark($name, $func) {
4+
$start = microtime(true);
5+
// A lower iteration count because str_ireplace can be very slow with arrays on long strings.
6+
for ($i = 0; $i < 500; $i++) {
7+
$func();
8+
}
9+
$end = microtime(true);
10+
echo str_pad($name, 50) . ": " . number_format($end - $start, 6) . "s\n";
11+
}
12+
13+
echo "Benchmarking current str_ireplace implementation...\n";
14+
15+
// A long string (~22KB) with a variety of characters to search for.
16+
$long_haystack = str_repeat("The quick brown fox jumps over the lazy dog. ", 500);
17+
18+
// An array of search terms that will all be found.
19+
$search_array = ['the', 'quick', 'brown', 'fox', 'jumps', 'over', 'lazy', 'dog'];
20+
$replace_array = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'];
21+
22+
// An array of search terms that will not be found.
23+
$search_array_no_match = ['xylophone', 'yak', 'zebra', 'walrus', 'vulture', 'unicorn', 'tiger', 'snake'];
24+
25+
// ===== TEST CASES =====
26+
27+
// Case 1: Simple string search (no arrays) for baseline comparison.
28+
benchmark("Simple string search", function() use ($long_haystack) {
29+
str_ireplace('fox', 'cat', $long_haystack);
30+
});
31+
32+
// Case 2: Array search, single replace. THIS IS THE PRIMARY OPTIMIZATION TARGET.
33+
benchmark("Array search, single replacement", function() use ($long_haystack, $search_array) {
34+
str_ireplace($search_array, 'REPLACED', $long_haystack);
35+
});
36+
37+
// Case 3: Array search, array replace. THIS IS THE PRIMARY OPTIMIZATION TARGET.
38+
benchmark("Array search, array replacement", function() use ($long_haystack, $search_array, $replace_array) {
39+
str_ireplace($search_array, $replace_array, $long_haystack);
40+
});
41+
42+
// Case 4: Array search with no matches. To ensure no performance regression.
43+
benchmark("Array search, no matches", function() use ($long_haystack, $search_array_no_match) {
44+
str_ireplace($search_array_no_match, 'REPLACED', $long_haystack);
45+
});
46+
47+
?>

benchmark_str_pad.php

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
<?php
2+
3+
function benchmark_str_pad(string $input, int $pad_length, string $pad_string, int $pad_type) {
4+
$start = microtime(true);
5+
for ($i = 0; $i < 100000; $i++) {
6+
str_pad($input, $pad_length, $pad_string, $pad_type);
7+
}
8+
$end = microtime(true);
9+
return $end - $start;
10+
}
11+
12+
$short_string = 'Hello';
13+
$long_string = str_repeat('Hello', 100);
14+
15+
echo "Benchmarking current str_pad implementation...\n";
16+
17+
echo "Short string, right pad: " . benchmark_str_pad($short_string, 100, ' ', STR_PAD_RIGHT) . "s\n";
18+
echo "Short string, left pad: " . benchmark_str_pad($short_string, 100, ' ', STR_PAD_LEFT) . "s\n";
19+
echo "Short string, both pad: " . benchmark_str_pad($short_string, 100, ' ', STR_PAD_BOTH) . "s\n";
20+
21+
echo "Long string, right pad: " . benchmark_str_pad($long_string, 1000, ' ', STR_PAD_RIGHT) . "s\n";
22+
echo "Long string, left pad: " . benchmark_str_pad($long_string, 1000, ' ', STR_PAD_LEFT) . "s\n";
23+
echo "Long string, both pad: " . benchmark_str_pad($long_string, 1000, ' ', STR_PAD_BOTH) . "s\n";
24+
25+
echo "Short string, multi-char pad, right: " . benchmark_str_pad($short_string, 100, '-=', STR_PAD_RIGHT) . "s\n";
26+
echo "Short string, multi-char pad, left: " . benchmark_str_pad($short_string, 100, '-=', STR_PAD_LEFT) . "s\n";
27+
echo "Short string, multi-char pad, both: " . benchmark_str_pad($short_string, 100, '-=', STR_PAD_BOTH) . "s\n";

0 commit comments

Comments
 (0)