Skip to content

Commit 3d6a89a

Browse files
authored
NETOBSERV-1649: Improve UX and cases managed with prometheus (#549)
* NETOBSERV-1649: Improve UX and cases managed with prometheus - Automatically switch to PktDropBytes/Packets metrics when there's a drop cause/state filter - Make topology queries less strict on fetching drops for decoration: an error will not break the whole thing anymore - Display warning when that happens - small refactoring related to warning message and details * Fix error reporting on missing labels * fix lint
1 parent 1a710ec commit 3d6a89a

File tree

14 files changed

+171
-138
lines changed

14 files changed

+171
-138
lines changed

pkg/model/fields/fields.go

Lines changed: 51 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,57 @@
11
package fields
22

33
const (
4-
Src = "Src"
5-
Dst = "Dst"
6-
Namespace = "K8S_Namespace"
7-
SrcNamespace = Src + Namespace
8-
DstNamespace = Dst + Namespace
9-
OwnerType = "K8S_OwnerType"
10-
SrcOwnerType = Src + OwnerType
11-
DstOwnerType = Dst + OwnerType
12-
OwnerName = "K8S_OwnerName"
13-
SrcOwnerName = Src + OwnerName
14-
DstOwnerName = Dst + OwnerName
15-
Type = "K8S_Type"
16-
SrcType = Src + Type
17-
DstType = Dst + Type
18-
Name = "K8S_Name"
19-
SrcName = Src + Name
20-
DstName = Dst + Name
21-
Addr = "Addr"
22-
SrcAddr = Src + Addr
23-
DstAddr = Dst + Addr
24-
Port = "Port"
25-
SrcPort = Src + Port
26-
DstPort = Dst + Port
27-
HostIP = "K8S_HostIP"
28-
SrcHostIP = Src + HostIP
29-
DstHostIP = Dst + HostIP
30-
HostName = "K8S_HostName"
31-
SrcHostName = Src + HostName
32-
DstHostName = Dst + HostName
33-
Zone = "K8S_Zone"
34-
SrcZone = Src + Zone
35-
DstZone = Dst + Zone
36-
Cluster = "K8S_ClusterName"
37-
Layer = "K8S_FlowLayer"
38-
Packets = "Packets"
39-
PktDropPackets = "PktDropPackets"
40-
Proto = "Proto"
41-
Bytes = "Bytes"
42-
DSCP = "Dscp"
43-
PktDropBytes = "PktDropBytes"
44-
FlowDirection = "FlowDirection"
45-
Interfaces = "Interfaces"
46-
IfDirections = "IfDirections"
47-
DNSID = "DnsId"
48-
DNSLatency = "DnsLatencyMs"
49-
DNSErrNo = "DnsErrno"
50-
DNSCode = "DnsFlagsResponseCode"
51-
Duplicate = "Duplicate"
52-
TimeFlowRTT = "TimeFlowRttNs"
4+
Src = "Src"
5+
Dst = "Dst"
6+
Namespace = "K8S_Namespace"
7+
SrcNamespace = Src + Namespace
8+
DstNamespace = Dst + Namespace
9+
OwnerType = "K8S_OwnerType"
10+
SrcOwnerType = Src + OwnerType
11+
DstOwnerType = Dst + OwnerType
12+
OwnerName = "K8S_OwnerName"
13+
SrcOwnerName = Src + OwnerName
14+
DstOwnerName = Dst + OwnerName
15+
Type = "K8S_Type"
16+
SrcType = Src + Type
17+
DstType = Dst + Type
18+
Name = "K8S_Name"
19+
SrcName = Src + Name
20+
DstName = Dst + Name
21+
Addr = "Addr"
22+
SrcAddr = Src + Addr
23+
DstAddr = Dst + Addr
24+
Port = "Port"
25+
SrcPort = Src + Port
26+
DstPort = Dst + Port
27+
HostIP = "K8S_HostIP"
28+
SrcHostIP = Src + HostIP
29+
DstHostIP = Dst + HostIP
30+
HostName = "K8S_HostName"
31+
SrcHostName = Src + HostName
32+
DstHostName = Dst + HostName
33+
Zone = "K8S_Zone"
34+
SrcZone = Src + Zone
35+
DstZone = Dst + Zone
36+
Cluster = "K8S_ClusterName"
37+
Layer = "K8S_FlowLayer"
38+
Packets = "Packets"
39+
Proto = "Proto"
40+
Bytes = "Bytes"
41+
DSCP = "Dscp"
42+
PktDropPackets = "PktDropPackets"
43+
PktDropBytes = "PktDropBytes"
44+
PktDropLatestState = "PktDropLatestState"
45+
PktDropLatestDropCause = "PktDropLatestDropCause"
46+
FlowDirection = "FlowDirection"
47+
Interfaces = "Interfaces"
48+
IfDirections = "IfDirections"
49+
DNSID = "DnsId"
50+
DNSLatency = "DnsLatencyMs"
51+
DNSErrNo = "DnsErrno"
52+
DNSCode = "DnsFlagsResponseCode"
53+
Duplicate = "Duplicate"
54+
TimeFlowRTT = "TimeFlowRttNs"
5355
)
5456

5557
func IsNumeric(v string) bool {

pkg/prometheus/inventory.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,16 @@ func (i *Inventory) Search(neededLabels []string, valueField string) SearchResul
7676

7777
func (i *Inventory) searchWithDir(neededLabels []string, valueField string, dir config.FlowDirection) SearchResult {
7878
sr := SearchResult{}
79+
// Special case, when the query has a filter to PktDropState/Cause and value field is bytes/packets,
80+
// we must consider value field is actually PktDropBytes/Packets
81+
if slices.Contains(neededLabels, fields.PktDropLatestDropCause) || slices.Contains(neededLabels, fields.PktDropLatestState) {
82+
switch valueField {
83+
case fields.Bytes:
84+
valueField = fields.PktDropBytes
85+
case fields.Packets:
86+
valueField = fields.PktDropPackets
87+
}
88+
}
7989
for _, m := range i.metrics {
8090
match, missingLabels := checkMatch(&m, neededLabels, valueField, dir)
8191
if match {
@@ -84,12 +94,12 @@ func (i *Inventory) searchWithDir(neededLabels []string, valueField string, dir
8494
return sr
8595
}
8696
sr.Candidates = append(sr.Candidates, m.Name)
87-
} else if m.Enabled && (sr.MissingLabels == nil || len(missingLabels) < len(sr.MissingLabels)) {
97+
} else if m.Enabled && len(missingLabels) > 0 && (sr.MissingLabels == nil || len(missingLabels) < len(sr.MissingLabels)) {
8898
// Keep smaller possible set of missing labels
8999
sr.MissingLabels = missingLabels
90100
}
91101
}
92-
log.Debugf("No metric match for %v / %s (/ %s)", neededLabels, valueField, dir)
102+
log.Debugf("No metric match for %v / %s / %s", neededLabels, valueField, dir)
93103
return sr
94104
}
95105

pkg/prometheus/inventory_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,9 @@ func TestInventory_Search_RTT_Candidate(t *testing.T) {
9898
search := inv.Search([]string{"SrcK8S_Namespace", "DstK8S_Namespace", "K8S_FlowLayer", "DstK8S_Type", "SrcK8S_Type"}, "TimeFlowRttNs")
9999
assert.Equal(t, []string{"netobserv_workload_rtt_seconds"}, search.Candidates)
100100
}
101+
102+
func TestInventory_Search_MissingLabels(t *testing.T) {
103+
inv := NewInventory(&config.Prometheus{Metrics: configuredMetrics})
104+
search := inv.Search([]string{"SrcK8S_Namespace", "DstK8S_Namespace", "SrcK8S_HostName"}, "Bytes")
105+
assert.Equal(t, []string{"SrcK8S_HostName"}, search.MissingLabels)
106+
}

web/locales/en/plugin__netobserv-plugin.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,8 @@
168168
"Configuration limits": "Configuration limits",
169169
"Metrics": "Metrics",
170170
"You may consider the following changes to avoid this error:": "You may consider the following changes to avoid this error:",
171-
"Add missing metrics to prometheus using FlowMetric API": "Add missing metrics to prometheus using FlowMetric API",
172-
"Enable Loki in FlowCollector API": "Enable Loki in FlowCollector API",
171+
"Add missing metrics to prometheus in the FlowCollector API (processor.metrics.includeList)": "Add missing metrics to prometheus in the FlowCollector API (processor.metrics.includeList)",
172+
"Enable Loki in the FlowCollector API (loki.enable)": "Enable Loki in the FlowCollector API (loki.enable)",
173173
"Reduce the Query Options -> limit to reduce the number of results": "Reduce the Query Options -> limit to reduce the number of results",
174174
"Increase Loki \"max_entries_limit_per_query\" entry in configuration file": "Increase Loki \"max_entries_limit_per_query\" entry in configuration file",
175175
"Add Namespace, Owner or Resource filters (which use indexed fields) to improve the query performance": "Add Namespace, Owner or Resource filters (which use indexed fields) to improve the query performance",
@@ -318,6 +318,7 @@
318318
"When in \"Match any\" mode, try using only Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance": "When in \"Match any\" mode, try using only Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance",
319319
"Add Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance": "Add Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance",
320320
"Add more filters or decrease limit / range to improve the query performance": "Add more filters or decrease limit / range to improve the query performance",
321+
"Could not fetch drop information": "Could not fetch drop information",
321322
"Overview": "Overview",
322323
"Traffic flows": "Traffic flows",
323324
"Topology": "Topology",

web/src/components/messages/error.tsx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,11 @@ export const Error: React.FC<ErrorProps> = ({ title, error, isLokiRelated }) =>
132132
{error.includes('promUnsupported') && (
133133
<>
134134
<Text component={TextVariants.blockquote}>
135-
{t('Add missing metrics to prometheus using FlowMetric API')}
135+
{t('Add missing metrics to prometheus in the FlowCollector API (processor.metrics.includeList)')}
136+
</Text>
137+
<Text component={TextVariants.blockquote}>
138+
{t('Enable Loki in the FlowCollector API (loki.enable)')}
136139
</Text>
137-
<Text component={TextVariants.blockquote}>{t('Enable Loki in FlowCollector API')}</Text>
138140
</>
139141
)}
140142
{error.includes('max entries limit') && (

web/src/components/netflow-traffic.tsx

Lines changed: 56 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,14 @@ import {
7676
TopologyGroupTypes,
7777
TopologyOptions
7878
} from '../model/topology';
79+
import { Warning } from '../model/warnings';
7980
import { getFetchFunctions as getBackAndForthFetch } from '../utils/back-and-forth';
8081
import { Column, ColumnsId, ColumnSizeMap, getDefaultColumns } from '../utils/columns';
8182
import { loadConfig } from '../utils/config';
8283
import { ContextSingleton } from '../utils/context';
8384
import { computeStepInterval, getTimeRangeOptions, TimeRange } from '../utils/datetime';
8485
import { formatDuration, getDateMsInSeconds, getDateSInMiliseconds, parseDuration } from '../utils/duration';
85-
import { getHTTPErrorDetails, isPromUnsupportedError } from '../utils/errors';
86+
import { getHTTPErrorDetails, getPromUnsupportedError, isPromUnsupportedError } from '../utils/errors';
8687
import { exportToPng } from '../utils/export';
8788
import { checkFilterAvailable, getFilterDefinitions } from '../utils/filter-definitions';
8889
import { mergeFlowReporters } from '../utils/flows';
@@ -199,7 +200,7 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
199200
}
200201

201202
const [config, setConfig] = React.useState<Config>(defaultConfig);
202-
const [warningMessage, setWarningMessage] = React.useState<string | undefined>();
203+
const [warning, setWarning] = React.useState<Warning | undefined>();
203204
const [showViewOptions, setShowViewOptions] = useLocalStorage<boolean>(localStorageShowOptionsKey, false);
204205
const [showHistogram, setShowHistogram] = useLocalStorage<boolean>(localStorageShowHistogramKey, false);
205206
const [isViewOptionOverflowMenuOpen, setViewOptionOverflowMenuOpen] = React.useState(false);
@@ -472,7 +473,7 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
472473
setFilters(f);
473474
setFlows([]);
474475
setMetrics(defaultNetflowMetrics);
475-
setWarningMessage(undefined);
476+
setWarning(undefined);
476477
},
477478
[setFilters, setFlows]
478479
);
@@ -611,12 +612,12 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
611612
(query: Promise<unknown>) => {
612613
setLastRefresh(undefined);
613614
setLastDuration(undefined);
614-
setWarningMessage(undefined);
615+
setWarning(undefined);
615616
Promise.race([query, new Promise((resolve, reject) => setTimeout(reject, 4000, 'slow'))]).then(
616617
null,
617618
(reason: string) => {
618619
if (reason === 'slow') {
619-
setWarningMessage(`${t('Query is slow')}`);
620+
setWarning({ type: 'slow', summary: `${t('Query is slow')}` });
620621
}
621622
}
622623
);
@@ -626,22 +627,30 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
626627
[]
627628
);
628629

629-
const slownessReason = React.useCallback((): string => {
630-
if (match === 'any' && hasNonIndexFields(filters.list)) {
631-
return t(
632-
// eslint-disable-next-line max-len
633-
'When in "Match any" mode, try using only Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance'
634-
);
635-
}
636-
if (match === 'all' && !hasIndexFields(filters.list)) {
637-
return t(
638-
// eslint-disable-next-line max-len
639-
'Add Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance'
640-
);
641-
}
642-
return t('Add more filters or decrease limit / range to improve the query performance');
630+
const checkSlownessReason = React.useCallback(
631+
(w: Warning | undefined): Warning | undefined => {
632+
if (w?.type == 'slow') {
633+
let reason = '';
634+
if (match === 'any' && hasNonIndexFields(filters.list)) {
635+
reason = t(
636+
// eslint-disable-next-line max-len
637+
'When in "Match any" mode, try using only Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance'
638+
);
639+
} else if (match === 'all' && !hasIndexFields(filters.list)) {
640+
reason = t(
641+
// eslint-disable-next-line max-len
642+
'Add Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance'
643+
);
644+
} else {
645+
reason = t('Add more filters or decrease limit / range to improve the query performance');
646+
}
647+
return { ...w, details: reason };
648+
}
649+
return w;
650+
},
643651
// eslint-disable-next-line react-hooks/exhaustive-deps
644-
}, [match, filters]);
652+
[match, filters]
653+
);
645654

646655
const fetchTable = React.useCallback(
647656
(fq: FlowQuery) => {
@@ -1005,21 +1014,34 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
10051014

10061015
if (droppedType) {
10071016
promises.push(
1008-
getMetrics({ ...fq, type: droppedType }, range).then(res => {
1009-
const droppedRateMetrics = {} as RateMetrics;
1010-
droppedRateMetrics[getRateMetricKey(topologyMetricType)] = res.metrics;
1011-
currentMetrics = { ...currentMetrics, droppedRateMetrics };
1012-
setMetrics(currentMetrics);
1013-
return res.stats;
1014-
})
1017+
getMetrics({ ...fq, type: droppedType }, range)
1018+
.then(res => {
1019+
const droppedRateMetrics = {} as RateMetrics;
1020+
droppedRateMetrics[getRateMetricKey(topologyMetricType)] = res.metrics;
1021+
currentMetrics = { ...currentMetrics, droppedRateMetrics };
1022+
setMetrics(currentMetrics);
1023+
return res.stats;
1024+
})
1025+
.catch(err => {
1026+
// Error might occur for instance when fetching node-based topology with drop feature enabled, and Loki disabled
1027+
// We don't want to break the whole topology due to missing drops enrichement
1028+
let strErr = getHTTPErrorDetails(err, true);
1029+
if (isPromUnsupportedError(strErr)) {
1030+
strErr = getPromUnsupportedError(strErr);
1031+
}
1032+
setWarning({ type: 'cantfetchdrops', summary: t('Could not fetch drop information'), details: strErr });
1033+
return { numQueries: 0, dataSources: [], limitReached: false };
1034+
})
10151035
);
10161036
} else if (!['PktDropBytes', 'PktDropPackets'].includes(topologyMetricType)) {
10171037
currentMetrics = { ...currentMetrics, droppedRateMetrics: undefined };
10181038
setMetrics(currentMetrics);
10191039
}
10201040
return Promise.all(promises);
10211041
},
1022-
[config.features, getFetchFunctions, topologyMetricType, topologyMetricFunction, range]
1042+
// "t" dependency kills jest
1043+
// eslint-disable-next-line react-hooks/exhaustive-deps
1044+
[config.features, getFetchFunctions, topologyMetricType, topologyMetricFunction, range, setWarning]
10231045
);
10241046

10251047
const tick = React.useCallback(() => {
@@ -1076,7 +1098,7 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
10761098
setFlows([]);
10771099
setMetrics(defaultNetflowMetrics);
10781100
setError(getHTTPErrorDetails(err, true));
1079-
setWarningMessage(undefined);
1101+
setWarning(undefined);
10801102
})
10811103
.finally(() => {
10821104
const endDate = new Date();
@@ -1536,8 +1558,7 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
15361558
limit={limit}
15371559
lastRefresh={lastRefresh}
15381560
lastDuration={lastDuration}
1539-
warningMessage={warningMessage}
1540-
slownessReason={slownessReason()}
1561+
warning={checkSlownessReason(warning)}
15411562
range={range}
15421563
showDNSLatency={isDNSTracking()}
15431564
showRTTLatency={isFlowRTT()}
@@ -1685,8 +1706,7 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
16851706
loading={loading}
16861707
lastRefresh={lastRefresh}
16871708
lastDuration={lastDuration}
1688-
warningMessage={warningMessage}
1689-
slownessReason={slownessReason()}
1709+
warning={checkSlownessReason(warning)}
16901710
isShowQuerySummary={isShowQuerySummary}
16911711
toggleQuerySummary={() => onToggleQuerySummary(!isShowQuerySummary)}
16921712
isDark={isDarkTheme}
@@ -1698,8 +1718,7 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
16981718
loading={loading}
16991719
lastRefresh={lastRefresh}
17001720
lastDuration={lastDuration}
1701-
warningMessage={warningMessage}
1702-
slownessReason={slownessReason()}
1721+
warning={checkSlownessReason(warning)}
17031722
range={range}
17041723
type={recordType}
17051724
isShowQuerySummary={isShowQuerySummary}
@@ -1749,13 +1768,13 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
17491768
setOverviewFocus,
17501769
setTopologyOptions,
17511770
size,
1752-
slownessReason,
1771+
checkSlownessReason,
17531772
stats,
17541773
t,
17551774
topologyMetricFunction,
17561775
topologyMetricType,
17571776
topologyOptions,
1758-
warningMessage
1777+
warning
17591778
]);
17601779

17611780
//update data on filters changes

0 commit comments

Comments
 (0)