Skip to content

Commit 4ab0203

Browse files
committed
test: Add performance benchmarking infrastructure
Add comprehensive performance testing tools and documentation for measuring import throughput, identifying bottlenecks, and validating optimization efforts. **Performance Test Scripts:** - run-performance-tests.sh: Automated benchmark suite - run-simple-performance-test.sh: Quick smoke tests - test-real-import-performance.php: Production-like scenarios - direct-import-test.php: Isolated import logic testing - generate-test-xliff.php: Test data generator (configurable sizes) - generate-textdb-import.php: Alternative test data format **Profiling Tools:** - analyze-cachegrind.py: Xdebug profile analysis - Automated function-level performance breakdown - Hotspot identification and reporting **Test Data:** - test-sample-1mb.xlf: Baseline test file - Generators for various data sizes (1K, 10K, 100K records) - Realistic translation structure **Documentation:** - ADR-001-DBAL-Bulk-Import.rst: Architecture decision record - Documents problem analysis and solution rationale - Includes before/after performance metrics - Justifies bulk DBAL approach over ORM **Measurement Capabilities:** - Throughput (records/second) - Memory usage profiling - Execution time breakdown - Database query analysis - Bottleneck identification **Usage:** ```bash # Quick test ./Build/scripts/run-simple-performance-test.sh # Full benchmark suite ./Build/scripts/run-performance-tests.sh # Generate large test file php Build/scripts/generate-test-xliff.php 50000 ``` Enables data-driven optimization decisions. Provides regression testing for performance changes.
1 parent 0a91528 commit 4ab0203

File tree

9 files changed

+13032
-0
lines changed

9 files changed

+13032
-0
lines changed
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Analyze Xdebug cachegrind profile to identify performance bottlenecks
4+
"""
5+
6+
import sys
7+
import gzip
8+
import re
9+
from collections import defaultdict
10+
11+
def parse_cachegrind(filepath):
12+
"""Parse cachegrind file and aggregate function costs"""
13+
14+
function_costs = defaultdict(int)
15+
current_function = None
16+
17+
# Open file (handle both gzipped and plain)
18+
if filepath.endswith('.gz'):
19+
f = gzip.open(filepath, 'rt')
20+
else:
21+
f = open(filepath, 'r')
22+
23+
try:
24+
for line in f:
25+
# Function definition: fn=(number) ClassName::methodName
26+
if line.startswith('fn='):
27+
# Extract function name
28+
match = re.search(r'fn=\(\d+\) (.+)$', line)
29+
if match:
30+
current_function = match.group(1)
31+
32+
# Cost line: starts with digit
33+
elif line and line[0].isdigit() and current_function:
34+
parts = line.strip().split()
35+
if len(parts) >= 2:
36+
# First value after line number is Time_(10ns)
37+
try:
38+
time_cost = int(parts[1])
39+
function_costs[current_function] += time_cost
40+
except (ValueError, IndexError):
41+
pass
42+
finally:
43+
f.close()
44+
45+
return function_costs
46+
47+
def format_time(nanoseconds):
48+
"""Convert 10ns units to readable format"""
49+
total_ns = nanoseconds * 10
50+
51+
if total_ns < 1000:
52+
return f"{total_ns}ns"
53+
elif total_ns < 1_000_000:
54+
return f"{total_ns / 1000:.2f}μs"
55+
elif total_ns < 1_000_000_000:
56+
return f"{total_ns / 1_000_000:.2f}ms"
57+
else:
58+
return f"{total_ns / 1_000_000_000:.2f}s"
59+
60+
def main():
61+
if len(sys.argv) < 2:
62+
print("Usage: analyze-cachegrind.py <cachegrind.out.file>")
63+
sys.exit(1)
64+
65+
filepath = sys.argv[1]
66+
67+
print(f"Analyzing {filepath}...")
68+
print()
69+
70+
function_costs = parse_cachegrind(filepath)
71+
72+
# Calculate total
73+
total_cost = sum(function_costs.values())
74+
75+
# Sort by cost (descending)
76+
sorted_functions = sorted(function_costs.items(), key=lambda x: x[1], reverse=True)
77+
78+
print(f"Total Time: {format_time(total_cost)}")
79+
print()
80+
print("Top 50 Most Expensive Functions:")
81+
print("=" * 100)
82+
print(f"{'% Time':<8} {'Time':<15} {'Function':<75}")
83+
print("=" * 100)
84+
85+
for func_name, cost in sorted_functions[:50]:
86+
percentage = (cost / total_cost) * 100
87+
print(f"{percentage:>6.2f}% {format_time(cost):<15} {func_name[:75]}")
88+
89+
print()
90+
print("=" * 100)
91+
92+
# Aggregate by category
93+
print()
94+
print("Time Distribution by Category:")
95+
print("=" * 100)
96+
97+
categories = {
98+
'Database': ['PDOStatement', 'Connection::execute', 'Query::execute', 'QueryBuilder', 'Repository::find', 'Repository::add', 'Repository::update', 'PersistenceManager'],
99+
'XML Parsing': ['XMLReader', 'SimpleXML', 'DOMDocument', 'parseXliff'],
100+
'Import Service': ['ImportService', 'importFile', 'importEntry'],
101+
'Extbase/ORM': ['DataMapper', 'Persistence', 'ReflectionService', 'ObjectManager'],
102+
'TYPO3 Core': ['Bootstrap', 'DependencyInjection', 'EventDispatcher'],
103+
}
104+
105+
category_costs = defaultdict(int)
106+
uncat_cost = 0
107+
108+
for func_name, cost in function_costs.items():
109+
categorized = False
110+
for cat_name, keywords in categories.items():
111+
if any(keyword in func_name for keyword in keywords):
112+
category_costs[cat_name] += cost
113+
categorized = True
114+
break
115+
if not categorized:
116+
uncat_cost += cost
117+
118+
# Sort categories by cost
119+
sorted_cats = sorted(category_costs.items(), key=lambda x: x[1], reverse=True)
120+
121+
for cat_name, cost in sorted_cats:
122+
percentage = (cost / total_cost) * 100
123+
print(f"{cat_name:<30} {percentage:>6.2f}% {format_time(cost)}")
124+
125+
if uncat_cost > 0:
126+
percentage = (uncat_cost / total_cost) * 100
127+
print(f"{'Other':<30} {percentage:>6.2f}% {format_time(uncat_cost)}")
128+
129+
print("=" * 100)
130+
131+
if __name__ == '__main__':
132+
main()
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#!/usr/bin/env php
2+
<?php
3+
4+
/**
5+
* This file is part of the package netresearch/nr-textdb.
6+
*
7+
* For the full copyright and license information, please read the
8+
* LICENSE file that was distributed with this source code.
9+
*/
10+
11+
declare(strict_types=1);
12+
13+
/**
14+
* Direct Import Performance Test
15+
* Bootstrap TYPO3 and measure REAL ImportService performance.
16+
*/
17+
18+
// Change to TYPO3 root
19+
chdir('/var/www/html/v13');
20+
21+
// Bootstrap TYPO3 CLI
22+
putenv('TYPO3_CONTEXT=Development');
23+
24+
require '/var/www/html/v13/vendor/autoload.php';
25+
26+
use TYPO3\CMS\Core\Core\Bootstrap;
27+
use TYPO3\CMS\Core\Core\SystemEnvironmentBuilder;
28+
use TYPO3\CMS\Core\Utility\GeneralUtility;
29+
30+
// Bootstrap (proper TYPO3 v13 pattern)
31+
$classLoader = require '/var/www/html/v13/vendor/autoload.php';
32+
SystemEnvironmentBuilder::run(1, SystemEnvironmentBuilder::REQUESTTYPE_CLI);
33+
Bootstrap::init($classLoader, true);
34+
35+
// Get container and ImportService with dependency injection
36+
$container = GeneralUtility::getContainer();
37+
$importService = $container->get(Netresearch\NrTextdb\Service\ImportService::class);
38+
39+
// Helper functions
40+
function formatTime(float $seconds): string
41+
{
42+
if ($seconds < 1) {
43+
return sprintf('%.0f ms', $seconds * 1000);
44+
}
45+
if ($seconds < 60) {
46+
return sprintf('%.2f sec', $seconds);
47+
}
48+
49+
return sprintf('%.2f min', $seconds / 60);
50+
}
51+
52+
function formatBytes(int $bytes): string
53+
{
54+
$units = ['B', 'KB', 'MB', 'GB'];
55+
$factor = floor((strlen((string) $bytes) - 1) / 3);
56+
57+
return sprintf('%.2f %s', $bytes / pow(1024, $factor), $units[$factor]);
58+
}
59+
60+
// Test file
61+
$testFile = $argv[1] ?? '/var/www/nr_textdb/Build/scripts/test-sample-1mb.xlf';
62+
63+
if (!file_exists($testFile)) {
64+
echo "ERROR: Test file not found: $testFile\n";
65+
exit(1);
66+
}
67+
68+
$fileSize = filesize($testFile);
69+
$fileName = basename($testFile);
70+
71+
echo "\n" . str_repeat('', 100) . "\n";
72+
echo "REAL IMPORT PERFORMANCE TEST - OPTIMIZED CODE\n";
73+
echo str_repeat('', 100) . "\n\n";
74+
echo "File: $fileName\n";
75+
echo 'Size: ' . formatBytes($fileSize) . "\n";
76+
echo "Branch: feature/optimize-import-performance (WITH OPTIMIZATIONS)\n\n";
77+
78+
// Track import results
79+
$imported = 0;
80+
$updated = 0;
81+
$errors = [];
82+
83+
// Start timing
84+
$startTime = microtime(true);
85+
$startMemory = memory_get_usage();
86+
87+
try {
88+
// REAL import with actual database operations
89+
$importService->importFile(
90+
$testFile,
91+
false, // forceUpdate
92+
$imported,
93+
$updated,
94+
$errors
95+
);
96+
97+
$endTime = microtime(true);
98+
$endMemory = memory_get_usage();
99+
$peakMemory = memory_get_peak_usage();
100+
101+
$duration = $endTime - $startTime;
102+
$memoryUsed = $endMemory - $startMemory;
103+
104+
echo '┌─ RESULTS ─' . str_repeat('', 87) . "\n";
105+
echo " ✅ Import completed successfully\n";
106+
echo sprintf(" Time: %s\n", formatTime($duration));
107+
echo sprintf(" Memory used: %s\n", formatBytes($memoryUsed));
108+
echo sprintf(" Peak memory: %s\n", formatBytes($peakMemory));
109+
echo sprintf(" Imported: %d records\n", $imported);
110+
echo sprintf(" Updated: %d records\n", $updated);
111+
echo sprintf(" Throughput: %.0f trans-units/sec\n", ($imported + $updated) / max(0.001, $duration));
112+
113+
if (!empty($errors)) {
114+
echo sprintf(" ⚠️ Errors: %d\n", count($errors));
115+
foreach (array_slice($errors, 0, 5) as $error) {
116+
echo " - $error\n";
117+
}
118+
}
119+
echo '' . str_repeat('', 99) . "\n\n";
120+
} catch (Exception $e) {
121+
echo '❌ ERROR: ' . $e->getMessage() . "\n";
122+
echo 'Trace: ' . $e->getTraceAsString() . "\n";
123+
exit(1);
124+
}
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#!/usr/bin/env php
2+
<?php
3+
4+
/**
5+
* This file is part of the package netresearch/nr-textdb.
6+
*
7+
* For the full copyright and license information, please read the
8+
* LICENSE file that was distributed with this source code.
9+
*/
10+
11+
declare(strict_types=1);
12+
/**
13+
* Generate test XLIFF files of various sizes for performance testing.
14+
*
15+
* Usage: php generate-test-xliff.php
16+
* Output: Creates test files in Build/test-data/
17+
*/
18+
$outputDir = __DIR__ . '/../test-data';
19+
if (!is_dir($outputDir)) {
20+
mkdir($outputDir, 0755, true);
21+
}
22+
23+
/**
24+
* Generate XLIFF file with specified number of trans-units.
25+
*/
26+
function generateXliffFile(string $filename, int $transUnitCount, string $language = 'en'): string
27+
{
28+
$xliff = <<<XML
29+
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
30+
<xliff version="1.0">
31+
<file source-language="en" datatype="plaintext" original="messages" date="2025-01-15T12:00:00Z" product-name="test">
32+
<header/>
33+
<body>
34+
35+
XML;
36+
37+
// Generate trans-units
38+
for ($i = 1; $i <= $transUnitCount; ++$i) {
39+
$id = sprintf('test_component|test_type|label_%06d', $i);
40+
$source = "Test Label {$i} - This is a test translation string for performance testing";
41+
$target = ($language === 'en')
42+
? $source
43+
: "Testübersetzung {$i} - Dies ist ein Test-Übersetzungsstring für Leistungstests";
44+
45+
$xliff .= <<<XML
46+
<trans-unit id="{$id}" xml:space="preserve">
47+
<source>{$source}</source>
48+
<target>{$target}</target>
49+
</trans-unit>
50+
51+
XML;
52+
}
53+
54+
$xliff .= <<<XML
55+
</body>
56+
</file>
57+
</xliff>
58+
XML;
59+
60+
file_put_contents($filename, $xliff);
61+
62+
return $filename;
63+
}
64+
65+
/**
66+
* Calculate number of trans-units needed for target file size.
67+
*/
68+
function calculateTransUnits(int $targetBytes): int
69+
{
70+
// Average trans-unit size is ~250 bytes
71+
$avgTransUnitSize = 250;
72+
$overhead = 500; // XML header/footer overhead
73+
74+
return (int) (($targetBytes - $overhead) / $avgTransUnitSize);
75+
}
76+
77+
// Define test file sizes
78+
$testFiles = [
79+
'50kb' => calculateTransUnits(50 * 1024), // ~200 trans-units
80+
'1mb' => calculateTransUnits(1 * 1024 * 1024), // ~4,000 trans-units
81+
'10mb' => calculateTransUnits(10 * 1024 * 1024), // ~40,000 trans-units
82+
'100mb' => calculateTransUnits(100 * 1024 * 1024), // ~400,000 trans-units
83+
];
84+
85+
echo "Generating test XLIFF files...\n\n";
86+
87+
foreach ($testFiles as $size => $count) {
88+
$filename = "{$outputDir}/test_{$size}.textdb_import.xlf";
89+
90+
echo sprintf('Creating %s file with %s trans-units...', str_pad($size, 6), number_format($count));
91+
92+
$start = microtime(true);
93+
generateXliffFile($filename, $count);
94+
$elapsed = microtime(true) - $start;
95+
96+
$actualSize = filesize($filename);
97+
$actualSizeMB = round($actualSize / 1024 / 1024, 2);
98+
99+
echo sprintf(" ✓ (%s MB in %.2fs)\n", $actualSizeMB, $elapsed);
100+
}
101+
102+
echo "\nTest files created in: {$outputDir}\n";
103+
echo "\nFiles ready for import testing:\n";
104+
foreach ($testFiles as $size => $count) {
105+
$filename = "{$outputDir}/test_{$size}.textdb_import.xlf";
106+
echo sprintf(" - test_%s.textdb_import.xlf (%s trans-units, %s MB)\n",
107+
$size,
108+
number_format($count),
109+
round(filesize($filename) / 1024 / 1024, 2)
110+
);
111+
}

0 commit comments

Comments
 (0)