Skip to content

Commit 72144bf

Browse files
committed
build: add performance testing and validation infrastructure
Add comprehensive scripts for performance measurement, validation, and controlled comparison testing of DBAL bulk import optimization. Scripts added: - generate-test-xliff.php: Create test files (50KB, 1MB, 10MB, 100MB) - controlled-comparison-test.sh: Branch comparison with clean database - run-simple-performance-test.sh: Quick performance validation - run-performance-tests.sh: Comprehensive benchmark suite - test-real-import-performance.php: Real-world import testing - direct-import-test.php: Direct ImportService testing - analyze-cachegrind.py: XDebug profiling analysis Testing infrastructure enables: - Reproducible performance measurements - Branch comparison validation (main vs optimized) - Automated controlled testing with database reset - Performance regression detection Used to validate 6-33x performance improvement claims.
1 parent 0b63b4e commit 72144bf

9 files changed

+12916
-0
lines changed
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Analyze Xdebug cachegrind profile to identify performance bottlenecks
4+
"""
5+
6+
import sys
7+
import gzip
8+
import re
9+
from collections import defaultdict
10+
11+
def parse_cachegrind(filepath):
12+
"""Parse cachegrind file and aggregate function costs"""
13+
14+
function_costs = defaultdict(int)
15+
current_function = None
16+
17+
# Open file (handle both gzipped and plain)
18+
if filepath.endswith('.gz'):
19+
f = gzip.open(filepath, 'rt')
20+
else:
21+
f = open(filepath, 'r')
22+
23+
try:
24+
for line in f:
25+
# Function definition: fn=(number) ClassName::methodName
26+
if line.startswith('fn='):
27+
# Extract function name
28+
match = re.search(r'fn=\(\d+\) (.+)$', line)
29+
if match:
30+
current_function = match.group(1)
31+
32+
# Cost line: starts with digit
33+
elif line and line[0].isdigit() and current_function:
34+
parts = line.strip().split()
35+
if len(parts) >= 2:
36+
# First value after line number is Time_(10ns)
37+
try:
38+
time_cost = int(parts[1])
39+
function_costs[current_function] += time_cost
40+
except (ValueError, IndexError):
41+
pass
42+
finally:
43+
f.close()
44+
45+
return function_costs
46+
47+
def format_time(nanoseconds):
48+
"""Convert 10ns units to readable format"""
49+
total_ns = nanoseconds * 10
50+
51+
if total_ns < 1000:
52+
return f"{total_ns}ns"
53+
elif total_ns < 1_000_000:
54+
return f"{total_ns / 1000:.2f}μs"
55+
elif total_ns < 1_000_000_000:
56+
return f"{total_ns / 1_000_000:.2f}ms"
57+
else:
58+
return f"{total_ns / 1_000_000_000:.2f}s"
59+
60+
def main():
61+
if len(sys.argv) < 2:
62+
print("Usage: analyze-cachegrind.py <cachegrind.out.file>")
63+
sys.exit(1)
64+
65+
filepath = sys.argv[1]
66+
67+
print(f"Analyzing {filepath}...")
68+
print()
69+
70+
function_costs = parse_cachegrind(filepath)
71+
72+
# Calculate total
73+
total_cost = sum(function_costs.values())
74+
75+
# Sort by cost (descending)
76+
sorted_functions = sorted(function_costs.items(), key=lambda x: x[1], reverse=True)
77+
78+
print(f"Total Time: {format_time(total_cost)}")
79+
print()
80+
print("Top 50 Most Expensive Functions:")
81+
print("=" * 100)
82+
print(f"{'% Time':<8} {'Time':<15} {'Function':<75}")
83+
print("=" * 100)
84+
85+
for func_name, cost in sorted_functions[:50]:
86+
percentage = (cost / total_cost) * 100
87+
print(f"{percentage:>6.2f}% {format_time(cost):<15} {func_name[:75]}")
88+
89+
print()
90+
print("=" * 100)
91+
92+
# Aggregate by category
93+
print()
94+
print("Time Distribution by Category:")
95+
print("=" * 100)
96+
97+
categories = {
98+
'Database': ['PDOStatement', 'Connection::execute', 'Query::execute', 'QueryBuilder', 'Repository::find', 'Repository::add', 'Repository::update', 'PersistenceManager'],
99+
'XML Parsing': ['XMLReader', 'SimpleXML', 'DOMDocument', 'parseXliff'],
100+
'Import Service': ['ImportService', 'importFile', 'importEntry'],
101+
'Extbase/ORM': ['DataMapper', 'Persistence', 'ReflectionService', 'ObjectManager'],
102+
'TYPO3 Core': ['Bootstrap', 'DependencyInjection', 'EventDispatcher'],
103+
}
104+
105+
category_costs = defaultdict(int)
106+
uncat_cost = 0
107+
108+
for func_name, cost in function_costs.items():
109+
categorized = False
110+
for cat_name, keywords in categories.items():
111+
if any(keyword in func_name for keyword in keywords):
112+
category_costs[cat_name] += cost
113+
categorized = True
114+
break
115+
if not categorized:
116+
uncat_cost += cost
117+
118+
# Sort categories by cost
119+
sorted_cats = sorted(category_costs.items(), key=lambda x: x[1], reverse=True)
120+
121+
for cat_name, cost in sorted_cats:
122+
percentage = (cost / total_cost) * 100
123+
print(f"{cat_name:<30} {percentage:>6.2f}% {format_time(cost)}")
124+
125+
if uncat_cost > 0:
126+
percentage = (uncat_cost / total_cost) * 100
127+
print(f"{'Other':<30} {percentage:>6.2f}% {format_time(uncat_cost)}")
128+
129+
print("=" * 100)
130+
131+
if __name__ == '__main__':
132+
main()
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#!/bin/bash
2+
#
3+
# Controlled Performance Comparison: main vs feature/async-import-queue
4+
# Tests: test_50kb.textdb_import.xlf (202 records)
5+
#
6+
set -e
7+
8+
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
9+
REPORT_FILE="${PROJECT_ROOT}/claudedocs/Controlled-Comparison-Results.md"
10+
TEST_FILE="test_10mb.textdb_import.xlf"
11+
TEST_SOURCE="${PROJECT_ROOT}/Build/test-data/${TEST_FILE}"
12+
# IMPORTANT: Copy to project root Resources, not vendor (vendor/netresearch/nr-textdb is symlink to /var/www/nr_textdb in container)
13+
FILE_DEST="${PROJECT_ROOT}/Resources/Private/Language/${TEST_FILE}"
14+
15+
echo "========================================"
16+
echo "Controlled Performance Comparison Test"
17+
echo "========================================"
18+
echo ""
19+
echo "Test File: ${TEST_FILE}"
20+
echo "Branches: main vs feature/async-import-queue"
21+
echo ""
22+
23+
# Save current branch and stash uncommitted changes
24+
ORIGINAL_BRANCH=$(git branch --show-current)
25+
echo "Current branch: ${ORIGINAL_BRANCH}"
26+
echo ""
27+
28+
# Stash any uncommitted changes
29+
echo "Stashing uncommitted changes..."
30+
git stash push -u -m "Controlled comparison test - temporary stash" > /dev/null 2>&1
31+
STASHED=$?
32+
echo ""
33+
34+
# Initialize report
35+
cat > "${REPORT_FILE}" <<'EOF'
36+
# Controlled Performance Comparison Results
37+
38+
**Date**: $(date +'%Y-%m-%d %H:%M:%S')
39+
**Test File**: test_50kb.textdb_import.xlf (202 trans-units)
40+
**Environment**: DDEV (WSL2), MySQL 8.0, TYPO3 v13.4
41+
42+
## Test Methodology
43+
44+
1. Clear database completely
45+
2. Copy test file to vendor location
46+
3. Run `vendor/bin/typo3 nr_textdb:import`
47+
4. Record time and import statistics
48+
5. Repeat for each branch
49+
50+
---
51+
52+
EOF
53+
54+
# Function to run single test
55+
run_test() {
56+
local branch=$1
57+
echo "Testing branch: ${branch}"
58+
59+
# Switch branch
60+
git checkout "${branch}" > /dev/null 2>&1
61+
62+
# Sync code to vendor (important!)
63+
rsync -a --delete \
64+
--exclude vendor \
65+
--exclude .git \
66+
"${PROJECT_ROOT}/" \
67+
"${PROJECT_ROOT}/vendor/netresearch/nr-textdb/"
68+
69+
# Clear TYPO3 cache
70+
ddev exec "rm -rf /var/www/html/v13/var/cache/*" > /dev/null 2>&1
71+
72+
# Clear database
73+
echo " Clearing database..."
74+
ddev exec "mysql -e 'TRUNCATE TABLE tx_nrtextdb_domain_model_translation; TRUNCATE TABLE tx_nrtextdb_domain_model_component; TRUNCATE TABLE tx_nrtextdb_domain_model_type; TRUNCATE TABLE tx_nrtextdb_domain_model_environment;'" > /dev/null 2>&1
75+
76+
# Copy test file to project Resources (maps to /var/www/nr_textdb in container)
77+
cp "${TEST_SOURCE}" "${FILE_DEST}"
78+
79+
# Run import with timing
80+
echo " Running import..."
81+
local output
82+
local start=$(date +%s.%N)
83+
output=$(ddev exec "cd /var/www/html/v13 && vendor/bin/typo3 nr_textdb:import 2>&1")
84+
local end=$(date +%s.%N)
85+
86+
# Calculate duration
87+
local duration=$(echo "${end} - ${start}" | bc)
88+
89+
# Extract statistics
90+
local imported=$(echo "${output}" | grep -oP 'Imported: \K\d+' || echo "0")
91+
local updated=$(echo "${output}" | grep -oP 'Updated: \K\d+' || echo "0")
92+
93+
# Clean up test file
94+
rm -f "${FILE_DEST}"
95+
96+
# Report results
97+
echo " Results: ${imported} imported in ${duration}s"
98+
echo ""
99+
100+
# Append to report
101+
cat >> "${REPORT_FILE}" <<EOF
102+
## Branch: ${branch}
103+
104+
\`\`\`
105+
Imported: ${imported} records
106+
Updated: ${updated} records
107+
Duration: ${duration}s
108+
Throughput: $(echo "scale=2; ${imported} / ${duration}" | bc) records/second
109+
\`\`\`
110+
111+
EOF
112+
}
113+
114+
# Run tests
115+
echo "Starting tests..."
116+
echo ""
117+
118+
run_test "main"
119+
run_test "feature/async-import-queue"
120+
121+
# Restore original branch
122+
git checkout "${ORIGINAL_BRANCH}" > /dev/null 2>&1
123+
124+
# Restore stashed changes if any were stashed
125+
if [ $STASHED -eq 0 ]; then
126+
echo ""
127+
echo "Restoring uncommitted changes..."
128+
git stash pop > /dev/null 2>&1
129+
fi
130+
131+
# Add comparison to report
132+
cat >> "${REPORT_FILE}" <<'EOF'
133+
## Analysis
134+
135+
[Analysis will be added after test completion]
136+
137+
---
138+
139+
**Test completed**: $(date +'%Y-%m-%d %H:%M:%S')
140+
EOF
141+
142+
echo "========================================"
143+
echo "Test Complete!"
144+
echo "========================================"
145+
echo ""
146+
echo "Report saved to: ${REPORT_FILE}"
147+
echo ""
148+
cat "${REPORT_FILE}"

0 commit comments

Comments
 (0)