Skip to content

Commit a2e19c6

Browse files
authored
⚡ optimize Prometheus metrics retrieval (#4589)
1 parent 42db9df commit a2e19c6

File tree

4 files changed

+186
-96
lines changed

4 files changed

+186
-96
lines changed

app/Providers/PrometheusServiceProvider.php

Lines changed: 101 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@
1919
use Spatie\Prometheus\Facades\Prometheus;
2020

2121
const PROM_JOB_SCRAPER_SEPARATOR = '-PROM-JOB-SCRAPER-SEPARATOR-';
22+
23+
/** Cache TTL for slow Prometheus metrics (10 minutes) */
24+
const PROM_CACHE_TTL = 600;
25+
26+
/** Cache TTL for frequently-changing metrics like en-route counts (2 minutes) */
27+
const PROM_CACHE_TTL_SHORT = 120;
28+
2229
class PrometheusServiceProvider extends ServiceProvider
2330
{
2431
/**
@@ -52,19 +59,18 @@ public function register(): void
5259

5360
public static function getJobsByDisplayName(string $tableName): array
5461
{
55-
$counts = DB::table($tableName)
56-
->get(['queue', 'payload'])
57-
->map(fn ($row) => [
58-
'queue' => $row->queue,
59-
'displayName' => json_decode($row->payload)->displayName])
60-
->countBy(fn ($job) => $job['displayName'] . PROM_JOB_SCRAPER_SEPARATOR . $job['queue'])
62+
// Use SQL JSON extraction instead of fetching all payload columns to PHP
63+
// and decoding them one by one.
64+
$jsonExtract = DB::getDriverName() === 'sqlite'
65+
? "json_extract(payload, '$.displayName')"
66+
: "JSON_UNQUOTE(JSON_EXTRACT(payload, '$.displayName'))";
67+
68+
return DB::table($tableName)
69+
->selectRaw("count(*) AS total, queue, {$jsonExtract} AS display_name")
70+
->groupBy('queue', 'display_name')
71+
->get()
72+
->map(fn ($row) => [$row->total, [$row->display_name, $row->queue]])
6173
->toArray();
62-
63-
return array_map(
64-
fn ($jobProperties, $count) => [$count, explode(PROM_JOB_SCRAPER_SEPARATOR, $jobProperties)],
65-
array_keys($counts),
66-
array_values($counts)
67-
);
6874
}
6975

7076
private function getHafasByType(array $getFailures): array
@@ -116,13 +122,13 @@ public function metaDataStats(): void
116122
Prometheus::addGauge('Stations count')
117123
->helpText('How many stations exist in the database?')
118124
->value(function () {
119-
return Station::count();
125+
return Cache::remember('prom_station_count', PROM_CACHE_TTL, fn () => Station::count());
120126
});
121127

122128
Prometheus::addGauge('Station identifiers count')
123129
->helpText('How many station identifiers exist in the database?')
124130
->value(function () {
125-
return StationIdentifier::count();
131+
return Cache::remember('prom_station_identifier_count', PROM_CACHE_TTL, fn () => StationIdentifier::count());
126132
});
127133

128134
Prometheus::addGauge('Users count')
@@ -149,57 +155,65 @@ public function metaDataStats(): void
149155
->helpText('How many hafas trips are posted grouped by operator and mode of transport?')
150156
->labels(['operator', 'category'])
151157
->value(function () {
152-
return Trip::groupBy('operator_id', 'category')
153-
->selectRaw('count(*) AS total, operator_id, category')
154-
->with('operator')
155-
->get()
156-
->map(fn ($item) => [$item->total, [$item->operator?->name, $item->category]])
157-
->toArray();
158+
return Cache::remember('prom_trips_by_operator_category', PROM_CACHE_TTL, function () {
159+
return Trip::leftJoin('hafas_operators', 'hafas_trips.operator_id', '=', 'hafas_operators.id')
160+
->groupBy('hafas_trips.operator_id', 'hafas_trips.category')
161+
->selectRaw('count(*) AS total, MAX(hafas_operators.name) AS operator_name, hafas_trips.category')
162+
->get()
163+
->map(fn ($item) => [$item->total, [$item->operator_name, $item->category]])
164+
->toArray();
165+
});
158166
});
159167

160168
Prometheus::addGauge('Trip Source count')
161169
->helpText('How many hafas trips are posted grouped by source?')
162170
->label('source')
163171
->value(function () {
164-
return Trip::groupBy('source')
165-
->selectRaw('count(*) AS total, source')
166-
->get()
167-
->map(fn ($item) => [$item->total, [$item->source?->value]])
168-
->toArray();
172+
return Cache::remember('prom_trips_by_source', PROM_CACHE_TTL, function () {
173+
return Trip::groupBy('source')
174+
->selectRaw('count(*) AS total, source')
175+
->get()
176+
->map(fn ($item) => [$item->total, [$item->source?->value]])
177+
->toArray();
178+
});
169179
});
170180

171181
Prometheus::addGauge('Polylines count')
172182
->helpText('How many polylines are saved grouped by source?')
173183
->labels(['source'])
174184
->value(function () {
175-
return PolyLine::groupBy('source')
176-
->selectRaw('count(*) AS total, source')
177-
->get()
178-
->map(fn ($item) => [$item->total, [$item->source]])
179-
->toArray();
185+
return Cache::remember('prom_polylines_by_source', PROM_CACHE_TTL, function () {
186+
return PolyLine::groupBy('source')
187+
->selectRaw('count(*) AS total, source')
188+
->get()
189+
->map(fn ($item) => [$item->total, [$item->source]])
190+
->toArray();
191+
});
180192
});
181193

182194
Prometheus::addGauge('profile_image_count')
183195
->helpText('How many profile images are stored?')
184196
->value(function () {
185-
$iter = new \FilesystemIterator(public_path('uploads/avatars'));
197+
return Cache::remember('prom_profile_image_count', PROM_CACHE_TTL, function () {
198+
$iter = new \FilesystemIterator(public_path('uploads/avatars'));
186199

187-
return iterator_count($iter);
200+
return iterator_count($iter);
201+
});
188202
});
189203

190204
Prometheus::addGauge('active_statuses_count')
191205
->helpText('How many trips are en route?')
192206
->value(function () {
193-
return Trip::where('departure', '<', now())
194-
->where('arrival', '>', now())
195-
->count();
207+
return Cache::remember('prom_active_statuses_count', PROM_CACHE_TTL_SHORT, function () {
208+
return Trip::where('departure', '<', now())
209+
->where('arrival', '>', now())
210+
->count();
211+
});
196212
});
197-
198213
}
199214

200215
public function queueMetrics(): void
201216
{
202-
203217
Prometheus::addGauge('queue_size')
204218
->helpText('How many items are currently in the job queue?')
205219
->labels(['queue'])
@@ -214,19 +228,23 @@ public function queueMetrics(): void
214228
->helpText('How many jobs have failed?')
215229
->labels(['job_name', 'queue'])
216230
->value(function () {
217-
return $this->getJobsByDisplayName('failed_jobs');
231+
return Cache::remember('prom_failed_jobs_count', PROM_CACHE_TTL_SHORT, function () {
232+
return $this->getJobsByDisplayName('failed_jobs');
233+
});
218234
});
219235

220236
Prometheus::addGauge('completed_jobs_count')
221237
->helpText('How many jobs are done? Old items from queue monitor table are deleted after 7 days.')
222238
->labels(['job_name', 'status', 'queue'])
223239
->value(function () {
224-
return DB::table('queue_monitor')
225-
->groupBy('name', 'status', 'queue')
226-
->selectRaw('count(*) AS total, name, status, queue')
227-
->get()
228-
->map(fn ($item) => [$item->total, [$item->name, MonitorStatus::toNamedArray()[$item->status], $item->queue]])
229-
->toArray();
240+
return Cache::remember('prom_completed_jobs_count', PROM_CACHE_TTL_SHORT, function () {
241+
return DB::table('queue_monitor')
242+
->groupBy('name', 'status', 'queue')
243+
->selectRaw('count(*) AS total, name, status, queue')
244+
->get()
245+
->map(fn ($item) => [$item->total, [$item->name, MonitorStatus::toNamedArray()[$item->status], $item->queue]])
246+
->toArray();
247+
});
230248
});
231249
}
232250

@@ -286,47 +304,55 @@ public function oAuthMetrics(): void
286304
->helpText('How many total (revoked and accredited) access tokens do the clients have?')
287305
->labels(['app_name'])
288306
->value(function () {
289-
return DB::table('oauth_access_tokens')
290-
->join('oauth_clients', 'oauth_access_tokens.client_id', '=', 'oauth_clients.id')
291-
->groupBy('oauth_clients.name')
292-
->selectRaw('count(*) AS total, oauth_clients.name AS name')
293-
->orderBy('total', 'desc')
294-
->limit(20)
295-
->get()
296-
->map(fn ($item) => [$item->total, [$item->name]])
297-
->toArray();
307+
return Cache::remember('prom_oauth_total_tokens', PROM_CACHE_TTL, function () {
308+
return DB::table('oauth_access_tokens')
309+
->join('oauth_clients', 'oauth_access_tokens.client_id', '=', 'oauth_clients.id')
310+
->groupBy('oauth_clients.name')
311+
->selectRaw('count(*) AS total, oauth_clients.name AS name')
312+
->orderBy('total', 'desc')
313+
->limit(20)
314+
->get()
315+
->map(fn ($item) => [$item->total, [$item->name]])
316+
->toArray();
317+
});
298318
});
319+
299320
Prometheus::addGauge('oauth_users')
300321
->helpText('How many access tokens do the clients have?')
301322
->labels(['app_name'])
302323
->value(function () {
303-
return DB::table('oauth_access_tokens')
304-
->join('oauth_clients', 'oauth_access_tokens.client_id', '=', 'oauth_clients.id')
305-
->groupBy('oauth_clients.name')
306-
->selectRaw('count(distinct oauth_access_tokens.user_id) AS total, oauth_clients.name AS name')
307-
->where('oauth_access_tokens.revoked', '=', 0)
308-
->whereNull('oauth_access_tokens.expires_at')
309-
->orderBy('total', 'desc')
310-
->limit(20)
311-
->get()
312-
->map(fn ($item) => [$item->total, [$item->name]])
313-
->toArray();
324+
return Cache::remember('prom_oauth_users', PROM_CACHE_TTL, function () {
325+
return DB::table('oauth_access_tokens')
326+
->join('oauth_clients', 'oauth_access_tokens.client_id', '=', 'oauth_clients.id')
327+
->groupBy('oauth_clients.name')
328+
->selectRaw('count(distinct oauth_access_tokens.user_id) AS total, oauth_clients.name AS name')
329+
->where('oauth_access_tokens.revoked', '=', 0)
330+
->whereNull('oauth_access_tokens.expires_at')
331+
->orderBy('total', 'desc')
332+
->limit(20)
333+
->get()
334+
->map(fn ($item) => [$item->total, [$item->name]])
335+
->toArray();
336+
});
314337
});
338+
315339
Prometheus::addGauge('oauth_revoked_tokens')
316340
->helpText('How many revoked access tokens do the clients have?')
317341
->labels(['app_name'])
318342
->value(function () {
319-
return DB::table('oauth_access_tokens')
320-
->join('oauth_clients', 'oauth_access_tokens.client_id', '=', 'oauth_clients.id')
321-
->groupBy('oauth_clients.name')
322-
->selectRaw('count(distinct oauth_access_tokens.user_id) AS total, oauth_clients.name AS name')
323-
->where('oauth_access_tokens.revoked', '!=', 0)
324-
->whereNotNull('oauth_access_tokens.expires_at', 'or')
325-
->orderBy('total', 'desc')
326-
->limit(20)
327-
->get()
328-
->map(fn ($item) => [$item->total, [$item->name]])
329-
->toArray();
343+
return Cache::remember('prom_oauth_revoked_tokens', PROM_CACHE_TTL, function () {
344+
return DB::table('oauth_access_tokens')
345+
->join('oauth_clients', 'oauth_access_tokens.client_id', '=', 'oauth_clients.id')
346+
->groupBy('oauth_clients.name')
347+
->selectRaw('count(distinct oauth_access_tokens.user_id) AS total, oauth_clients.name AS name')
348+
->where('oauth_access_tokens.revoked', '!=', 0)
349+
->whereNotNull('oauth_access_tokens.expires_at', 'or')
350+
->orderBy('total', 'desc')
351+
->limit(20)
352+
->get()
353+
->map(fn ($item) => [$item->total, [$item->name]])
354+
->toArray();
355+
});
330356
});
331357
}
332358
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
use Illuminate\Database\Migrations\Migration;
6+
use Illuminate\Database\Schema\Blueprint;
7+
use Illuminate\Support\Facades\Schema;
8+
9+
return new class() extends Migration
10+
{
11+
public function up(): void
12+
{
13+
Schema::table('hafas_trips', function (Blueprint $table): void {
14+
// Fixes Trip::groupBy('source'). Previously: full table scan + tmp table + filesort on 5.5M rows
15+
$table->index('source', 'hafas_trips_source_index');
16+
17+
// Fixes active_statuses_count. Previously: full table scan on 5.5M rows
18+
// arrival first: arrival > NOW() selects only ~36 rows (very selective)
19+
$table->index(['arrival', 'departure'], 'hafas_trips_arrival_departure_index');
20+
});
21+
}
22+
23+
public function down(): void
24+
{
25+
Schema::table('hafas_trips', function (Blueprint $table): void {
26+
$table->dropIndex('hafas_trips_source_index');
27+
$table->dropIndex('hafas_trips_arrival_departure_index');
28+
});
29+
}
30+
};
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
use Illuminate\Database\Migrations\Migration;
6+
use Illuminate\Database\Schema\Blueprint;
7+
use Illuminate\Support\Facades\Schema;
8+
9+
return new class() extends Migration
10+
{
11+
public function up(): void
12+
{
13+
Schema::table('oauth_access_tokens', function (Blueprint $table): void {
14+
// Fixes three Prometheus OAuth queries. Previously: full table scan + tmp table + filesort on 587K rows
15+
$table->index('client_id', 'oauth_access_tokens_client_id_index');
16+
});
17+
}
18+
19+
public function down(): void
20+
{
21+
Schema::table('oauth_access_tokens', function (Blueprint $table): void {
22+
$table->dropIndex('oauth_access_tokens_client_id_index');
23+
});
24+
}
25+
};

tests/Unit/Providers/PrometheusServiceProviderTest.php

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,41 +4,50 @@
44

55
use App\Providers\PrometheusServiceProvider;
66
use Illuminate\Foundation\Testing\RefreshDatabase;
7-
use Illuminate\Support\Collection;
87
use Illuminate\Support\Facades\DB;
98

10-
use function PHPUnit\Framework\assertEquals;
9+
use function PHPUnit\Framework\assertEqualsCanonicalizing;
1110

1211
use Tests\ApiTestCase;
1312

1413
class PrometheusServiceProviderTest extends ApiTestCase
1514
{
1615
use RefreshDatabase;
1716

18-
const TABLENAME = 'jobs';
17+
private const TABLENAME = 'jobs';
1918

20-
public function test_get_jobs_by_display_name()
19+
private function insertJob(string $queue, string $displayName): void
2120
{
22-
// GIVEN
23-
DB::shouldReceive('table')
24-
->with(self::TABLENAME)
25-
->once()
26-
->andReturnSelf();
27-
28-
DB::shouldReceive('get')
29-
->with(['queue', 'payload'])
30-
->andReturn(
31-
Collection::make(
32-
array_merge([
33-
...array_fill(0, 4, (object) ['queue' => 'default', 'payload' => json_encode(['displayName' => 'JobA'])]),
34-
...array_fill(0, 7, (object) ['queue' => 'webhook', 'payload' => json_encode(['displayName' => 'JobB'])]),
35-
...array_fill(0, 2, (object) ['queue' => 'default', 'payload' => json_encode(['displayName' => 'JobC'])]),
36-
...array_fill(0, 5, (object) ['queue' => 'webhook', 'payload' => json_encode(['displayName' => 'JobC'])]),
37-
])));
21+
DB::table(self::TABLENAME)->insert([
22+
'queue' => $queue,
23+
'payload' => json_encode(['displayName' => $displayName]),
24+
'attempts' => 0,
25+
'available_at' => now()->timestamp,
26+
'created_at' => now()->timestamp,
27+
]);
28+
}
3829

30+
public function test_get_jobs_by_display_name(): void
31+
{
32+
// GIVEN: insert real rows so SQL JSON extraction can be tested end-to-end
33+
foreach (range(1, 4) as $_) {
34+
$this->insertJob('default', 'JobA');
35+
}
36+
foreach (range(1, 7) as $_) {
37+
$this->insertJob('webhook', 'JobB');
38+
}
39+
foreach (range(1, 2) as $_) {
40+
$this->insertJob('default', 'JobC');
41+
}
42+
foreach (range(1, 5) as $_) {
43+
$this->insertJob('webhook', 'JobC');
44+
}
45+
46+
// WHEN
3947
$actual = PrometheusServiceProvider::getJobsByDisplayName(self::TABLENAME);
4048

41-
assertEquals([
49+
// THEN: order is not guaranteed by GROUP BY, so use canonical comparison
50+
assertEqualsCanonicalizing([
4251
[4, ['JobA', 'default']],
4352
[7, ['JobB', 'webhook']],
4453
[2, ['JobC', 'default']],

0 commit comments

Comments
 (0)