netresearch
diff --git a/‎Build/scripts/analyze-cachegrind.py‎
Lines changed: 132 additions & 0 deletions b/‎Build/scripts/analyze-cachegrind.py‎
Lines changed: 132 additions & 0 deletions
diff --git a/‎Build/scripts/controlled-comparison-test.sh‎
Lines changed: 148 additions & 0 deletions b/‎Build/scripts/controlled-comparison-test.sh‎
Lines changed: 148 additions & 0 deletions
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+"""
+Analyze Xdebug cachegrind profile to identify performance bottlenecks
+"""
+
+import sys
+import gzip
+import re
+from collections import defaultdict
+
+def parse_cachegrind(filepath):
+    """Parse cachegrind file and aggregate function costs"""
+
+    function_costs = defaultdict(int)
+    current_function = None
+
+    # Open file (handle both gzipped and plain)
+    if filepath.endswith('.gz'):
+        f = gzip.open(filepath, 'rt')
+    else:
+        f = open(filepath, 'r')
+
+    try:
+        for line in f:
+            # Function definition: fn=(number) ClassName::methodName
+            if line.startswith('fn='):
+                # Extract function name
+                match = re.search(r'fn=\(\d+\) (.+)$', line)
+                if match:
+                    current_function = match.group(1)
+
+            # Cost line: starts with digit
+            elif line and line[0].isdigit() and current_function:
+                parts = line.strip().split()
+                if len(parts) >= 2:
+                    # First value after line number is Time_(10ns)
+                    try:
+                        time_cost = int(parts[1])
+                        function_costs[current_function] += time_cost
+                    except (ValueError, IndexError):
+                        pass
+    finally:
+        f.close()
+
+    return function_costs
+
+def format_time(nanoseconds):
+    """Convert 10ns units to readable format"""
+    total_ns = nanoseconds * 10
+
+    if total_ns < 1000:
+        return f"{total_ns}ns"
+    elif total_ns < 1_000_000:
+        return f"{total_ns / 1000:.2f}μs"
+    elif total_ns < 1_000_000_000:
+        return f"{total_ns / 1_000_000:.2f}ms"
+    else:
+        return f"{total_ns / 1_000_000_000:.2f}s"
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: analyze-cachegrind.py <cachegrind.out.file>")
+        sys.exit(1)
+
+    filepath = sys.argv[1]
+
+    print(f"Analyzing {filepath}...")
+    print()
+
+    function_costs = parse_cachegrind(filepath)
+
+    # Calculate total
+    total_cost = sum(function_costs.values())
+
+    # Sort by cost (descending)
+    sorted_functions = sorted(function_costs.items(), key=lambda x: x[1], reverse=True)
+
+    print(f"Total Time: {format_time(total_cost)}")
+    print()
+    print("Top 50 Most Expensive Functions:")
+    print("=" * 100)
+    print(f"{'% Time':<8} {'Time':<15} {'Function':<75}")
+    print("=" * 100)
+
+    for func_name, cost in sorted_functions[:50]:
+        percentage = (cost / total_cost) * 100
+        print(f"{percentage:>6.2f}%  {format_time(cost):<15} {func_name[:75]}")
+
+    print()
+    print("=" * 100)
+
+    # Aggregate by category
+    print()
+    print("Time Distribution by Category:")
+    print("=" * 100)
+
+    categories = {
+        'Database': ['PDOStatement', 'Connection::execute', 'Query::execute', 'QueryBuilder', 'Repository::find', 'Repository::add', 'Repository::update', 'PersistenceManager'],
+        'XML Parsing': ['XMLReader', 'SimpleXML', 'DOMDocument', 'parseXliff'],
+        'Import Service': ['ImportService', 'importFile', 'importEntry'],
+        'Extbase/ORM': ['DataMapper', 'Persistence', 'ReflectionService', 'ObjectManager'],
+        'TYPO3 Core': ['Bootstrap', 'DependencyInjection', 'EventDispatcher'],
+    }
+
+    category_costs = defaultdict(int)
+    uncat_cost = 0
+
+    for func_name, cost in function_costs.items():
+        categorized = False
+        for cat_name, keywords in categories.items():
+            if any(keyword in func_name for keyword in keywords):
+                category_costs[cat_name] += cost
+                categorized = True
+                break
+        if not categorized:
+            uncat_cost += cost
+
+    # Sort categories by cost
+    sorted_cats = sorted(category_costs.items(), key=lambda x: x[1], reverse=True)
+
+    for cat_name, cost in sorted_cats:
+        percentage = (cost / total_cost) * 100
+        print(f"{cat_name:<30} {percentage:>6.2f}%  {format_time(cost)}")
+
+    if uncat_cost > 0:
+        percentage = (uncat_cost / total_cost) * 100
+        print(f"{'Other':<30} {percentage:>6.2f}%  {format_time(uncat_cost)}")
+
+    print("=" * 100)
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,148 @@
+#!/bin/bash
+#
+# Controlled Performance Comparison: main vs feature/async-import-queue
+# Tests: test_50kb.textdb_import.xlf (202 records)
+#
+set -e
+
+PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+REPORT_FILE="${PROJECT_ROOT}/claudedocs/Controlled-Comparison-Results.md"
+TEST_FILE="test_10mb.textdb_import.xlf"
+TEST_SOURCE="${PROJECT_ROOT}/Build/test-data/${TEST_FILE}"
+# IMPORTANT: Copy to project root Resources, not vendor (vendor/netresearch/nr-textdb is symlink to /var/www/nr_textdb in container)
+FILE_DEST="${PROJECT_ROOT}/Resources/Private/Language/${TEST_FILE}"
+
+echo "========================================"
+echo "Controlled Performance Comparison Test"
+echo "========================================"
+echo ""
+echo "Test File: ${TEST_FILE}"
+echo "Branches: main vs feature/async-import-queue"
+echo ""
+
+# Save current branch and stash uncommitted changes
+ORIGINAL_BRANCH=$(git branch --show-current)
+echo "Current branch: ${ORIGINAL_BRANCH}"
+echo ""
+
+# Stash any uncommitted changes
+echo "Stashing uncommitted changes..."
+git stash push -u -m "Controlled comparison test - temporary stash" > /dev/null 2>&1
+STASHED=$?
+echo ""
+
+# Initialize report
+cat > "${REPORT_FILE}" <<'EOF'
+# Controlled Performance Comparison Results
+
+**Date**: $(date +'%Y-%m-%d %H:%M:%S')
+**Test File**: test_50kb.textdb_import.xlf (202 trans-units)
+**Environment**: DDEV (WSL2), MySQL 8.0, TYPO3 v13.4
+
+## Test Methodology
+
+1. Clear database completely
+2. Copy test file to vendor location
+3. Run `vendor/bin/typo3 nr_textdb:import`
+4. Record time and import statistics
+5. Repeat for each branch
+
+---
+
+EOF
+
+# Function to run single test
+run_test() {
+    local branch=$1
+    echo "Testing branch: ${branch}"
+
+    # Switch branch
+    git checkout "${branch}" > /dev/null 2>&1
+
+    # Sync code to vendor (important!)
+    rsync -a --delete \
+        --exclude vendor \
+        --exclude .git \
+        "${PROJECT_ROOT}/" \
+        "${PROJECT_ROOT}/vendor/netresearch/nr-textdb/"
+
+    # Clear TYPO3 cache
+    ddev exec "rm -rf /var/www/html/v13/var/cache/*" > /dev/null 2>&1
+
+    # Clear database
+    echo "  Clearing database..."
+    ddev exec "mysql -e 'TRUNCATE TABLE tx_nrtextdb_domain_model_translation; TRUNCATE TABLE tx_nrtextdb_domain_model_component; TRUNCATE TABLE tx_nrtextdb_domain_model_type; TRUNCATE TABLE tx_nrtextdb_domain_model_environment;'" > /dev/null 2>&1
+
+    # Copy test file to project Resources (maps to /var/www/nr_textdb in container)
+    cp "${TEST_SOURCE}" "${FILE_DEST}"
+
+    # Run import with timing
+    echo "  Running import..."
+    local output
+    local start=$(date +%s.%N)
+    output=$(ddev exec "cd /var/www/html/v13 && vendor/bin/typo3 nr_textdb:import 2>&1")
+    local end=$(date +%s.%N)
+
+    # Calculate duration
+    local duration=$(echo "${end} - ${start}" | bc)
+
+    # Extract statistics
+    local imported=$(echo "${output}" | grep -oP 'Imported: \K\d+' || echo "0")
+    local updated=$(echo "${output}" | grep -oP 'Updated: \K\d+' || echo "0")
+
+    # Clean up test file
+    rm -f "${FILE_DEST}"
+
+    # Report results
+    echo "  Results: ${imported} imported in ${duration}s"
+    echo ""
+
+    # Append to report
+    cat >> "${REPORT_FILE}" <<EOF
+## Branch: ${branch}
+
+\`\`\`
+Imported: ${imported} records
+Updated: ${updated} records
+Duration: ${duration}s
+Throughput: $(echo "scale=2; ${imported} / ${duration}" | bc) records/second
+\`\`\`
+
+EOF
+}
+
+# Run tests
+echo "Starting tests..."
+echo ""
+
+run_test "main"
+run_test "feature/async-import-queue"
+
+# Restore original branch
+git checkout "${ORIGINAL_BRANCH}" > /dev/null 2>&1
+
+# Restore stashed changes if any were stashed
+if [ $STASHED -eq 0 ]; then
+    echo ""
+    echo "Restoring uncommitted changes..."
+    git stash pop > /dev/null 2>&1
+fi
+
+# Add comparison to report
+cat >> "${REPORT_FILE}" <<'EOF'
+## Analysis
+
+[Analysis will be added after test completion]
+
+---
+
+**Test completed**: $(date +'%Y-%m-%d %H:%M:%S')
+EOF
+
+echo "========================================"
+echo "Test Complete!"
+echo "========================================"
+echo ""
+echo "Report saved to: ${REPORT_FILE}"
+echo ""
+cat "${REPORT_FILE}"