Skip to content

Commit 3281ccf

Browse files
committed
mgr/dashboard: Replace capacity threshold data with prometheus metrics
- Fixes https://tracker.ceph.com/issues/72519 - the osd dump metrics is used in /api/osd/settings - this metrics creates perf bottleneck when osds are 1000s - replacing with similar prometheus metrics - minor refactors - including renaming, comments. Signed-off-by: Afreen Misbah <[email protected]>
1 parent 33ab081 commit 3281ccf

File tree

5 files changed

+84
-32
lines changed

5 files changed

+84
-32
lines changed

src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,8 @@
229229
<ng-container class="ms-4 me-4"
230230
*ngIf="capacity">
231231
<cd-dashboard-pie [data]="{max: capacity.total_bytes, current: capacity.total_used_raw_bytes}"
232-
[lowThreshold]="osdSettings.nearfull_ratio"
233-
[highThreshold]="osdSettings.full_ratio">
232+
[lowThreshold]="capacityCardData.osdNearfull"
233+
[highThreshold]="capacityCardData.osdFull">
234234
</cd-dashboard-pie>
235235
</ng-container>
236236
</cd-card>

src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.ts

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@ import { Component, OnDestroy, OnInit } from '@angular/core';
22

33
import _ from 'lodash';
44
import { BehaviorSubject, EMPTY, Observable, Subject, Subscription, of } from 'rxjs';
5-
import { catchError, exhaustMap, switchMap, take, takeUntil } from 'rxjs/operators';
5+
import { catchError, exhaustMap, switchMap, takeUntil } from 'rxjs/operators';
66

77
import { HealthService } from '~/app/shared/api/health.service';
8-
import { OsdService } from '~/app/shared/api/osd.service';
9-
import { PrometheusService } from '~/app/shared/api/prometheus.service';
10-
import { Promqls as queries } from '~/app/shared/enum/dashboard-promqls.enum';
8+
import { PrometheusService, PromqlGuageMetric } from '~/app/shared/api/prometheus.service';
9+
import {
10+
CapacityCardQueries,
11+
UtilizationCardQueries
12+
} from '~/app/shared/enum/dashboard-promqls.enum';
1113
import { Icons } from '~/app/shared/enum/icons.enum';
1214
import { DashboardDetails } from '~/app/shared/models/cd-details';
1315
import { Permissions } from '~/app/shared/models/permissions';
@@ -26,7 +28,6 @@ import { MgrModuleService } from '~/app/shared/api/mgr-module.service';
2628
import { AlertClass } from '~/app/shared/enum/health-icon.enum';
2729
import { HardwareService } from '~/app/shared/api/hardware.service';
2830
import { SettingsService } from '~/app/shared/api/settings.service';
29-
import { OsdSettings } from '~/app/shared/models/osd-settings';
3031
import {
3132
IscsiMap,
3233
MdsMap,
@@ -36,15 +37,23 @@ import {
3637
PgStatus
3738
} from '~/app/shared/models/health.interface';
3839

40+
type CapacityCardData = {
41+
osdNearfull: number;
42+
osdFull: number;
43+
};
44+
3945
@Component({
4046
selector: 'cd-dashboard-v3',
4147
templateUrl: './dashboard-v3.component.html',
4248
styleUrls: ['./dashboard-v3.component.scss']
4349
})
4450
export class DashboardV3Component extends PrometheusListHelper implements OnInit, OnDestroy {
4551
detailsCardData: DashboardDetails = {};
46-
osdSettingsService: any;
47-
osdSettings = new OsdSettings();
52+
capacityCardData: CapacityCardData = {
53+
osdNearfull: null,
54+
osdFull: null
55+
};
56+
interval = new Subscription();
4857
permissions: Permissions;
4958
enabledFeature$: FeatureTogglesMap$;
5059
color: string;
@@ -102,7 +111,6 @@ export class DashboardV3Component extends PrometheusListHelper implements OnInit
102111
constructor(
103112
private summaryService: SummaryService,
104113
private orchestratorService: OrchestratorService,
105-
private osdService: OsdService,
106114
private authStorageService: AuthStorageService,
107115
private featureToggles: FeatureTogglesService,
108116
private healthService: HealthService,
@@ -121,7 +129,6 @@ export class DashboardV3Component extends PrometheusListHelper implements OnInit
121129
ngOnInit() {
122130
super.ngOnInit();
123131
if (this.permissions.configOpt.read) {
124-
this.getOsdSettings();
125132
this.isHardwareEnabled$ = this.getHardwareConfig();
126133
this.hardwareSummary$ = this.hardwareSubject.pipe(
127134
switchMap(() =>
@@ -148,6 +155,7 @@ export class DashboardV3Component extends PrometheusListHelper implements OnInit
148155
this.getPrometheusData(this.prometheusService.lastHourDateObject);
149156
this.getDetailsCardData();
150157
this.getTelemetryReport();
158+
this.getCapacityCardData();
151159
this.prometheusAlertService.getAlerts(true);
152160
}
153161

@@ -185,23 +193,38 @@ export class DashboardV3Component extends PrometheusListHelper implements OnInit
185193
);
186194
}
187195

188-
private getOsdSettings() {
189-
this.osdSettingsService = this.osdService
190-
.getOsdSettings()
191-
.pipe(take(1))
192-
.subscribe((data: OsdSettings) => {
193-
this.osdSettings = data;
194-
});
195-
}
196-
197196
public getPrometheusData(selectedTime: any) {
198-
this.queriesResults = this.prometheusService.getPrometheusQueriesData(
197+
this.queriesResults = this.prometheusService.getRangeQueriesData(
199198
selectedTime,
200-
queries,
199+
UtilizationCardQueries,
201200
this.queriesResults
202201
);
203202
}
204203

204+
getCapacityQueryValues(data: PromqlGuageMetric['result']) {
205+
let osdFull = null;
206+
let osdNearfull = null;
207+
if (data?.[0]?.metric?.['__name__'] === CapacityCardQueries.OSD_FULL) {
208+
osdFull = data[0]?.value?.[1];
209+
osdNearfull = data[1]?.value?.[1];
210+
} else {
211+
osdFull = data?.[1]?.value?.[1];
212+
osdNearfull = data?.[0]?.value?.[1];
213+
}
214+
return [osdFull, osdNearfull];
215+
}
216+
217+
getCapacityCardData() {
218+
const CAPACITY_QUERY = `{__name__=~"${CapacityCardQueries.OSD_FULL}|${CapacityCardQueries.OSD_NEARFULL}"}`;
219+
this.prometheusService
220+
.getGaugeQueryData(CAPACITY_QUERY)
221+
.subscribe((data: PromqlGuageMetric) => {
222+
const [osdFull, osdNearfull] = this.getCapacityQueryValues(data?.result);
223+
this.capacityCardData.osdFull = this.prometheusService.formatGuageMetric(osdFull);
224+
this.capacityCardData.osdNearfull = this.prometheusService.formatGuageMetric(osdNearfull);
225+
});
226+
}
227+
205228
private getTelemetryReport() {
206229
this.healthService.getTelemetryStatus().subscribe((enabled: boolean) => {
207230
this.telemetryEnabled = enabled;

src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ export class RgwOverviewDashboardComponent implements OnInit, OnDestroy {
149149
}
150150

151151
getPrometheusData(selectedTime: any) {
152-
this.queriesResults = this.prometheusService.getPrometheusQueriesData(
152+
this.queriesResults = this.prometheusService.getRangeQueriesData(
153153
selectedTime,
154154
queries,
155155
this.queriesResults,

src/pybind/mgr/dashboard/frontend/src/app/shared/api/prometheus.service.ts

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { HttpClient } from '@angular/common/http';
22
import { Injectable } from '@angular/core';
33

4-
import { Observable, Subscription, forkJoin, timer } from 'rxjs';
5-
import { map, switchMap } from 'rxjs/operators';
4+
import { Observable, Subscription, forkJoin, of, timer } from 'rxjs';
5+
import { catchError, map, switchMap } from 'rxjs/operators';
66

77
import { AlertmanagerSilence } from '../models/alertmanager-silence';
88
import {
@@ -12,6 +12,16 @@ import {
1212
} from '../models/prometheus-alerts';
1313
import moment from 'moment';
1414

15+
export type PromethuesGaugeMetricResult = {
16+
metric: Record<string, string>; // metric metadata
17+
value: [number, string]; // timestamp, value
18+
};
19+
20+
export type PromqlGuageMetric = {
21+
resultType: 'vector';
22+
result: PromethuesGaugeMetricResult[];
23+
};
24+
1525
@Injectable({
1626
providedIn: 'root'
1727
})
@@ -38,10 +48,16 @@ export class PrometheusService {
3848
}
3949
}
4050

51+
// Range Queries
4152
getPrometheusData(params: any): any {
4253
return this.http.get<any>(`${this.baseURL}/data`, { params });
4354
}
4455

56+
// Guage Queries
57+
getPrometheusQueryData(params: { params: string }): Observable<PromqlGuageMetric> {
58+
return this.http.get<any>(`${this.baseURL}/prometheus_query_data`, { params });
59+
}
60+
4561
ifAlertmanagerConfigured(fn: (value?: string) => void, elseFn?: () => void): void {
4662
this.ifSettingConfigured(this.settingsKey.alertmanager, fn, elseFn);
4763
}
@@ -131,12 +147,20 @@ export class PrometheusService {
131147
return data.value || data.instance || '';
132148
}
133149

134-
getPrometheusQueriesData(
135-
selectedTime: any,
136-
queries: any,
137-
queriesResults: any,
138-
checkNan?: boolean
139-
) {
150+
getGaugeQueryData(query: string): Observable<PromqlGuageMetric> {
151+
return this.getPrometheusQueryData({ params: query }).pipe(
152+
map((result: PromqlGuageMetric) => result),
153+
catchError(() => of({ result: [] } as PromqlGuageMetric))
154+
);
155+
}
156+
157+
formatGuageMetric(data: string): number {
158+
const value: number = parseFloat(data ?? '');
159+
// Guage value can be "Nan", "+inf", "-inf" in case of errors
160+
return isFinite(value) ? value : null;
161+
}
162+
163+
getRangeQueriesData(selectedTime: any, queries: any, queriesResults: any, checkNan?: boolean) {
140164
this.ifPrometheusConfigured(() => {
141165
if (this.timerGetPrometheusDataSub) {
142166
this.timerGetPrometheusDataSub.unsubscribe();

src/pybind/mgr/dashboard/frontend/src/app/shared/enum/dashboard-promqls.enum.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
export enum Promqls {
1+
export enum UtilizationCardQueries {
22
USEDCAPACITY = 'ceph_cluster_total_used_bytes',
33
WRITEIOPS = 'sum(rate(ceph_pool_wr[1m]))',
44
READIOPS = 'sum(rate(ceph_pool_rd[1m]))',
@@ -9,6 +9,11 @@ export enum Promqls {
99
RECOVERYBYTES = 'sum(rate(ceph_osd_recovery_bytes[1m]))'
1010
}
1111

12+
export enum CapacityCardQueries {
13+
OSD_NEARFULL = 'ceph_osd_nearfull_ratio',
14+
OSD_FULL = 'ceph_osd_full_ratio'
15+
}
16+
1217
export enum RgwPromqls {
1318
RGW_REQUEST_PER_SECOND = 'sum(rate(ceph_rgw_req[1m]))',
1419
AVG_GET_LATENCY = '(sum(rate(ceph_rgw_op_get_obj_lat_sum[1m])) / sum(rate(ceph_rgw_op_get_obj_lat_count[1m]))) * 1000',

0 commit comments

Comments
 (0)