Skip to content

Commit 0c22560

Browse files
authored
Merge pull request #40 from houdaslassi/feature/optimized-tag-aggregation
Feature/optimized tag aggregation
2 parents 1e0458e + fd13b9d commit 0c22560

File tree

7 files changed

+1007
-155
lines changed

7 files changed

+1007
-155
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
<?php
2+
3+
use Illuminate\Database\Migrations\Migration;
4+
use Illuminate\Database\Schema\Blueprint;
5+
use Illuminate\Support\Facades\Schema;
6+
7+
return new class extends Migration
8+
{
9+
/**
10+
* Get the database connection for the migration.
11+
*/
12+
public function getConnection(): ?string
13+
{
14+
return config('vantage.database_connection');
15+
}
16+
17+
/**
18+
* Run the migrations.
19+
*
20+
* Creates a denormalized tags table for efficient aggregation queries.
21+
* This table stores one row per (job_id, tag) pair, allowing for
22+
* O(1) GROUP BY aggregations instead of O(n) JSON parsing.
23+
*
24+
* This is essential for high-volume installations (100k+ jobs)
25+
* where JSON array parsing in application code becomes prohibitively slow.
26+
*/
27+
public function up(): void
28+
{
29+
$connection = $this->getConnection();
30+
$schema = Schema::connection($connection);
31+
32+
$schema->create('vantage_job_tags', function (Blueprint $table) {
33+
$table->id();
34+
$table->unsignedBigInteger('job_id');
35+
$table->string('tag', 255)->index();
36+
$table->timestamp('created_at')->nullable();
37+
38+
// Composite indexes for efficient aggregation queries
39+
$table->index(['tag', 'created_at'], 'idx_vantage_job_tags_tag_created');
40+
$table->index(['job_id'], 'idx_vantage_job_tags_job_id');
41+
42+
// Foreign key with cascade delete
43+
$table->foreign('job_id')
44+
->references('id')
45+
->on('vantage_jobs')
46+
->onDelete('cascade');
47+
});
48+
}
49+
50+
/**
51+
* Reverse the migrations.
52+
*/
53+
public function down(): void
54+
{
55+
$connection = $this->getConnection();
56+
$schema = Schema::connection($connection);
57+
58+
$schema->dropIfExists('vantage_job_tags');
59+
}
60+
};
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
<?php
2+
3+
namespace HoudaSlassi\Vantage\Console\Commands;
4+
5+
use HoudaSlassi\Vantage\Models\VantageJob;
6+
use HoudaSlassi\Vantage\Support\TagAggregator;
7+
use Illuminate\Console\Command;
8+
use Illuminate\Support\Facades\DB;
9+
10+
class BackfillJobTags extends Command
11+
{
12+
protected $signature = 'vantage:backfill-tags
13+
{--days= : Only backfill jobs from the last X days (default: all)}
14+
{--chunk=1000 : Number of jobs to process per batch}
15+
{--force : Skip confirmation prompt}';
16+
17+
protected $description = 'Backfill the vantage_job_tags table from existing jobs for optimized tag queries';
18+
19+
public function handle(): int
20+
{
21+
$aggregator = new TagAggregator;
22+
23+
// Check if tags table exists
24+
if (! $aggregator->hasTagsTable()) {
25+
$this->error('The vantage_job_tags table does not exist.');
26+
$this->line('');
27+
$this->line('Run the migration first:');
28+
$this->line(' php artisan migrate');
29+
$this->line('');
30+
$this->line('Or publish and run Vantage migrations:');
31+
$this->line(' php artisan vendor:publish --tag=vantage-migrations');
32+
$this->line(' php artisan migrate');
33+
34+
return self::FAILURE;
35+
}
36+
37+
$days = $this->option('days') ? (int) $this->option('days') : null;
38+
$chunkSize = (int) $this->option('chunk');
39+
$force = $this->option('force');
40+
41+
// Build query for jobs to backfill
42+
$query = VantageJob::whereNotNull('job_tags');
43+
44+
if ($days !== null) {
45+
$query->where('created_at', '>', now()->subDays($days));
46+
$period = "from the last {$days} days";
47+
} else {
48+
$period = 'all time';
49+
}
50+
51+
// Count jobs to process
52+
$totalJobs = $query->count();
53+
54+
if ($totalJobs === 0) {
55+
$this->info('No jobs with tags found to backfill.');
56+
57+
return self::SUCCESS;
58+
}
59+
60+
// Check if already populated
61+
$existingCount = DB::connection($this->getConnectionName())
62+
->table('vantage_job_tags')
63+
->count();
64+
65+
if ($existingCount > 0) {
66+
$this->warn("The vantage_job_tags table already contains {$existingCount} records.");
67+
$this->line('');
68+
69+
if (! $force && ! $this->confirm('Do you want to clear existing records and re-backfill?', false)) {
70+
$this->info('Backfill cancelled. Existing data preserved.');
71+
$this->line('');
72+
$this->line('Options:');
73+
$this->line(' - Use --force to overwrite without confirmation');
74+
$this->line(' - Use --days=X to only backfill recent jobs (appends to existing)');
75+
76+
return self::SUCCESS;
77+
}
78+
79+
// Clear existing records matching the time range
80+
$this->info('Clearing existing tag records...');
81+
if ($days !== null) {
82+
$cutoff = now()->subDays($days);
83+
DB::connection($this->getConnectionName())
84+
->table('vantage_job_tags')
85+
->where('created_at', '>', $cutoff)
86+
->delete();
87+
} else {
88+
DB::connection($this->getConnectionName())
89+
->table('vantage_job_tags')
90+
->truncate();
91+
}
92+
}
93+
94+
$this->info("Backfilling tags for {$totalJobs} jobs ({$period})...");
95+
$this->line("Processing in chunks of {$chunkSize}...");
96+
$this->line('');
97+
98+
$bar = $this->output->createProgressBar($totalJobs);
99+
$bar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
100+
$bar->setMessage('Starting...');
101+
$bar->start();
102+
103+
$processed = 0;
104+
$tagsInserted = 0;
105+
$errors = 0;
106+
107+
$query->select(['id', 'job_tags', 'created_at'])
108+
->orderBy('id')
109+
->chunk($chunkSize, function ($jobs) use (&$processed, &$tagsInserted, &$errors, $bar) {
110+
$records = [];
111+
112+
foreach ($jobs as $job) {
113+
$processed++;
114+
115+
if (empty($job->job_tags) || ! is_array($job->job_tags)) {
116+
continue;
117+
}
118+
119+
foreach ($job->job_tags as $tag) {
120+
if (is_string($tag) && trim($tag) !== '') {
121+
$records[] = [
122+
'job_id' => $job->id,
123+
'tag' => trim($tag),
124+
'created_at' => $job->created_at,
125+
];
126+
}
127+
}
128+
}
129+
130+
// Batch insert for performance
131+
if (! empty($records)) {
132+
try {
133+
// Insert in smaller batches to avoid MySQL max_allowed_packet issues
134+
foreach (array_chunk($records, 500) as $batch) {
135+
DB::connection($this->getConnectionName())
136+
->table('vantage_job_tags')
137+
->insert($batch);
138+
$tagsInserted += count($batch);
139+
}
140+
} catch (\Throwable $e) {
141+
$errors++;
142+
// Log but continue
143+
}
144+
}
145+
146+
$bar->setMessage("{$tagsInserted} tags inserted");
147+
$bar->setProgress($processed);
148+
});
149+
150+
$bar->finish();
151+
$this->line('');
152+
$this->line('');
153+
154+
// Summary
155+
$this->info('✓ Backfill completed!');
156+
$this->line('');
157+
$this->table(
158+
['Metric', 'Value'],
159+
[
160+
['Jobs processed', number_format($processed)],
161+
['Tags inserted', number_format($tagsInserted)],
162+
['Avg tags per job', $processed > 0 ? round($tagsInserted / $processed, 2) : 0],
163+
['Errors', $errors],
164+
]
165+
);
166+
167+
// Show optimization tip
168+
if ($aggregator->supportsEfficientJsonOperations()) {
169+
$this->line('');
170+
$this->info('✓ Your database now supports efficient tag aggregation!');
171+
$this->line(' Dashboard tag queries will be ~100x faster for large datasets.');
172+
} else {
173+
$this->line('');
174+
$this->warn('Note: Your database driver does not support efficient JSON operations.');
175+
$this->line(' The tags table will be used as a fallback for fast aggregation.');
176+
}
177+
178+
return self::SUCCESS;
179+
}
180+
181+
/**
182+
* Get the database connection name.
183+
*/
184+
protected function getConnectionName(): ?string
185+
{
186+
return config('vantage.database_connection');
187+
}
188+
}

src/Console/Commands/PruneOldJobs.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace HoudaSlassi\Vantage\Console\Commands;
44

55
use HoudaSlassi\Vantage\Models\VantageJob;
6+
use HoudaSlassi\Vantage\Support\TagAggregator;
67
use Illuminate\Console\Command;
78
use Illuminate\Support\Facades\DB;
89

@@ -139,6 +140,15 @@ public function handle(): int
139140

140141
$this->info("\nSuccessfully pruned {$deleted} job record(s) older than {$period}.");
141142

143+
// Also prune the denormalized tags table if it exists
144+
$tagAggregator = new TagAggregator;
145+
if ($tagAggregator->hasTagsTable()) {
146+
$tagsDeleted = $tagAggregator->pruneOldTags($cutoff);
147+
if ($tagsDeleted > 0) {
148+
$this->line("Also pruned {$tagsDeleted} tag record(s) from vantage_job_tags table.");
149+
}
150+
}
151+
142152
// Show remaining stats
143153
$remaining = VantageJob::count();
144154
$this->line('Remaining jobs in database: '.number_format($remaining));

0 commit comments

Comments
 (0)