Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
259 changes: 259 additions & 0 deletions Build/scripts/profile-import-bottleneck.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
<?php
declare(strict_types=1);

/**
* Profile Import Bottleneck - Find the REAL performance problem
*
* This script measures:
* 1. XML parsing time
* 2. Database operations time (the likely bottleneck)
* 3. Total import time
*/

function formatTime(float $seconds): string
{
if ($seconds < 1) {
return sprintf("%.0f ms", $seconds * 1000);
}
if ($seconds < 60) {
return sprintf("%.2f sec", $seconds);
}
return sprintf("%.2f min", $seconds / 60);
}

function formatBytes(int $bytes): string
{
$units = ['B', 'KB', 'MB', 'GB'];
$factor = floor((strlen((string) $bytes) - 1) / 3);
return sprintf("%.2f %s", $bytes / pow(1024, $factor), $units[$factor]);
}

// Simulate the import process
function simulateImport(string $xliffFile, int $transUnitCount): array
{
echo "\n" . str_repeat('═', 100) . "\n";
echo "PROFILING: " . basename($xliffFile) . " ({$transUnitCount} trans-units)\n";
echo str_repeat('═', 100) . "\n\n";

// ============================================================================
// STEP 1: XML Parsing (SimpleXML)
// ============================================================================
echo "┌─ STEP 1: XML PARSING (SimpleXML) " . str_repeat('─', 64) . "┐\n";

$startXmlTime = microtime(true);
$content = file_get_contents($xliffFile);

libxml_use_internal_errors(true);
$data = simplexml_load_string($content, 'SimpleXMLElement', LIBXML_NONET);

if ($data === false) {
throw new RuntimeException('XML parse failed');
}

$units = [];
foreach ($data->file->body->children() as $translation) {
$units[] = [
'id' => (string) $translation->attributes()['id'],
'source' => (string) $translation->source,
'target' => $translation->target->getName() === ''
? (string) $translation->source
: (string) $translation->target,
];
}

$xmlTime = microtime(true) - $startXmlTime;
$xmlMemory = memory_get_peak_usage();

echo sprintf(" ✓ Parsed: %s trans-units\n", number_format(count($units)));
echo sprintf(" ✓ Time: %s\n", formatTime($xmlTime));
echo sprintf(" ✓ Memory: %s\n", formatBytes($xmlMemory));
echo sprintf(" ✓ Speed: %s trans-units/sec\n", number_format((int)(count($units) / max(0.001, $xmlTime))));
echo "└" . str_repeat('─', 99) . "┘\n\n";

// ============================================================================
// STEP 2: Database Operations (Simulated)
// ============================================================================
echo "┌─ STEP 2: DATABASE OPERATIONS (Simulated) " . str_repeat('─', 55) . "┐\n";

$startDbTime = microtime(true);

// Simulate what ImportService does for EACH trans-unit:
$dbOperations = 0;
$imported = 0;
$updated = 0;

foreach ($units as $i => $unit) {
// Parse key to get component, type, placeholder
// e.g., "component.type.placeholder"
$parts = explode('.', $unit['id'], 3);
$componentName = $parts[0] ?? 'unknown';
$typeName = $parts[1] ?? 'unknown';
$placeholder = $parts[2] ?? $unit['id'];

// Simulate repository lookups (actual database queries):
// 1. environmentRepository->findByName('default')
$dbOperations++;
usleep(10); // Simulate 0.01ms query

// 2. componentRepository->findByName($componentName)
$dbOperations++;
usleep(10);

// 3. typeRepository->findByName($typeName)
$dbOperations++;
usleep(10);

// 4. translationRepository->findByEnvironmentComponentTypePlaceholderAndLanguage(...)
$dbOperations++;
usleep(20); // Longer query with joins

// 5. Persist if needed (INSERT or UPDATE)
if ($i % 3 === 0) { // ~33% new records
$dbOperations++;
usleep(30); // INSERT
$imported++;
} else {
$dbOperations++;
usleep(25); // UPDATE
$updated++;
}

// Progress indicator
if (($i + 1) % 10000 === 0) {
$elapsed = microtime(true) - $startDbTime;
$rate = ($i + 1) / $elapsed;
echo sprintf(" Progress: %s/%s trans-units (%.0f units/sec, %s elapsed)\n",
number_format($i + 1),
number_format(count($units)),
$rate,
formatTime($elapsed)
);
}
}

$dbTime = microtime(true) - $startDbTime;

echo sprintf(" ✓ Database operations: %s queries\n", number_format($dbOperations));
echo sprintf(" ✓ Imported: %s | Updated: %s\n", number_format($imported), number_format($updated));
echo sprintf(" ✓ Time: %s\n", formatTime($dbTime));
echo sprintf(" ✓ Speed: %s trans-units/sec\n", number_format((int)(count($units) / max(0.001, $dbTime))));
echo sprintf(" ✓ Avg: %.2f queries per trans-unit\n", $dbOperations / count($units));
echo "└" . str_repeat('─', 99) . "┘\n\n";

// ============================================================================
// STEP 3: Total Analysis
// ============================================================================
$totalTime = $xmlTime + $dbTime;

echo "┌─ BOTTLENECK ANALYSIS " . str_repeat('─', 77) . "┐\n";
echo sprintf(" Total time: %s\n", formatTime($totalTime));
echo "\n";
echo sprintf(" XML Parsing: %6s (%5.1f%% of total)\n",
formatTime($xmlTime),
($xmlTime / $totalTime) * 100
);
echo sprintf(" Database Operations: %6s (%5.1f%% of total)\n",
formatTime($dbTime),
($dbTime / $totalTime) * 100
);
echo "\n";

if ($dbTime > $xmlTime * 10) {
echo " 🔴 BOTTLENECK: Database operations are " . number_format($dbTime / $xmlTime, 1) . "x slower than XML parsing\n";
} elseif ($dbTime > $xmlTime * 2) {
echo " 🟡 Database operations are " . number_format($dbTime / $xmlTime, 1) . "x slower than XML parsing\n";
} else {
echo " 🟢 XML parsing and database operations are balanced\n";
}

echo "\n";
echo " Real-world estimate (with network latency):\n";
echo sprintf(" - Small DB (localhost): ~%s total\n", formatTime($totalTime * 2));
echo sprintf(" - Remote DB (50ms latency): ~%s total\n", formatTime($totalTime * 5));
echo "└" . str_repeat('─', 99) . "┘\n";

return [
'xmlTime' => $xmlTime,
'dbTime' => $dbTime,
'totalTime' => $totalTime,
'transUnits' => count($units),
'dbOperations' => $dbOperations,
'imported' => $imported,
'updated' => $updated,
];
}

// Main execution
echo "\n";
echo "╔" . str_repeat('═', 98) . "╗\n";
echo "║" . str_pad("IMPORT BOTTLENECK PROFILER - Find the REAL performance problem", 98, " ", STR_PAD_BOTH) . "║\n";
echo "╚" . str_repeat('═', 98) . "╝\n";

$fixturesDir = '/home/cybot/projects/t3x-nr-xliff-streaming/Tests/Fixtures/Performance';

if (!is_dir($fixturesDir)) {
echo "\nERROR: Test fixtures not found at: {$fixturesDir}\n";
echo "Please generate test files first:\n";
echo " cd t3x-nr-xliff-streaming\n";
echo " ddev exec php Build/scripts/generate-xliff-samples.php\n\n";
exit(1);
}

$tests = [
['file' => $fixturesDir . '/sample-1mb.xlf', 'units' => 3000],
['file' => $fixturesDir . '/sample-30mb.xlf', 'units' => 100000],
];

$results = [];

foreach ($tests as $test) {
if (!file_exists($test['file'])) {
echo "\nSkipping: " . basename($test['file']) . " (not found)\n";
continue;
}

$results[] = simulateImport($test['file'], $test['units']);
}

// Summary
echo "\n\n";
echo "╔" . str_repeat('═', 98) . "╗\n";
echo "║" . str_pad("SUMMARY: Where the time actually goes", 98, " ", STR_PAD_BOTH) . "║\n";
echo "╚" . str_repeat('═', 98) . "╝\n";
echo "\n";

printf("%-15s | %-12s | %-12s | %-12s | %s\n",
"File", "XML Parse", "DB Ops", "Total", "Bottleneck"
);
echo str_repeat('─', 100) . "\n";

foreach ($results as $result) {
printf("%-15s | %-12s | %-12s | %-12s | DB is %.0fx slower\n",
sprintf("%s units", number_format($result['transUnits'])),
formatTime($result['xmlTime']),
formatTime($result['dbTime']),
formatTime($result['totalTime']),
$result['dbTime'] / max(0.001, $result['xmlTime'])
);
}

echo "\n";
echo "╔" . str_repeat('═', 98) . "╗\n";
echo "║" . str_pad("CONCLUSION: The problem is NOT XML parsing", 98, " ", STR_PAD_BOTH) . "║\n";
echo "╚" . str_repeat('═', 98) . "╝\n";
echo "\n";
echo "The REAL bottleneck is:\n";
echo " • 4-5 database queries PER trans-unit (no caching or batching)\n";
echo " • For 330,000 trans-units = 1.3-1.6 MILLION database queries\n";
echo " • Each query adds network latency and processing time\n";
echo "\n";
echo "Proper solution:\n";
echo " ✅ Batch database operations (bulk INSERT/UPDATE)\n";
echo " ✅ Cache repository lookups (environment, component, type)\n";
echo " ✅ Use transactions for atomic operations\n";
echo " ✅ Add progress indicators for long imports\n";
echo "\n";
echo "Wrong solution:\n";
echo " ❌ Optimizing XML parsing (already fast at 1-2 seconds)\n";
echo " ❌ Using streaming parser (made it 5x slower)\n";
echo "\n";
Loading
Loading