Skip to content

Commit 4daf4d8

Browse files
committed
Add recovery_status to lustrefs_exporter and
enhance recovery status parser to include additional metrics: - RecoveryDuration - RecoveryTimeRemaining - RecoveryTotalClients
1 parent d39f692 commit 4daf4d8

23 files changed

+740
-75
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
obdfilter.200NVX2-OST0000.recovery_status=
2+
status: COMPLETE
3+
recovery_start: 1761567697
4+
recovery_duration: 1
5+
completed_clients: 8/8
6+
replayed_requests: 0
7+
last_transno: 17184814233
8+
VBR: DISABLED
9+
IR: ENABLED
10+
obdfilter.200NVX2-OST0003.recovery_status=
11+
status: COMPLETE
12+
recovery_start: 1759494115
13+
recovery_duration: 15
14+
completed_clients: 8/8
15+
replayed_requests: 0
16+
last_transno: 12934942643
17+
VBR: DISABLED
18+
IR: DISABLED
19+
obdfilter.200NVX2-OST0004.recovery_status=
20+
status: COMPLETE
21+
recovery_start: 1759494115
22+
recovery_duration: 14
23+
completed_clients: 8/8
24+
replayed_requests: 0
25+
last_transno: 12934956643
26+
VBR: DISABLED
27+
IR: DISABLED
28+
obdfilter.200NVX2-OST0007.recovery_status=
29+
status: COMPLETE
30+
recovery_start: 1759494115
31+
recovery_duration: 14
32+
completed_clients: 8/8
33+
replayed_requests: 0
34+
last_transno: 12934943652
35+
VBR: DISABLED
36+
IR: DISABLED

lustre-collector/src/parser.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use crate::{
66
ldlm, llite, mdd_parser,
77
mds::{self, client_count_parser},
88
mgs::mgs_parser,
9-
nodemap, osd_parser, oss, quota, top_level_parser,
9+
nodemap, osd_parser, oss, quota, recovery_status_parser, top_level_parser,
1010
types::Record,
1111
};
1212
use combine::{Parser, Stream, choice, error::ParseError, many};
@@ -24,6 +24,7 @@ pub fn params() -> Vec<String> {
2424
.chain(mdd_parser::params())
2525
.chain(quota::params())
2626
.chain(nodemap::params())
27+
.chain(recovery_status_parser::params())
2728
.collect()
2829
}
2930

@@ -44,6 +45,7 @@ where
4445
mdd_parser::parse().map(|x| vec![x]),
4546
quota::parse().map(|x| vec![x]),
4647
nodemap::parse().map(|x| vec![x]),
48+
recovery_status_parser::parse().map(|x| x),
4749
)))
4850
.map(|xs: Vec<_>| xs.into_iter().flatten().collect())
4951
}
@@ -86,6 +88,7 @@ mod tests {
8688
test_fixtures!(test_lustre_2_14_0_ddn145_fixtures, "*ddn145*");
8789

8890
test_fixtures!(test_lustre_2_14_0_ddn133_fixtures, "*ddn133*");
91+
test_fixtures!(test_lustre_2_14_0_ddn212_fixtures, "*ddn212*");
8992

9093
#[test]
9194
fn test_params() {

lustre-collector/src/recovery_status_parser.rs

Lines changed: 97 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ where
7171
})
7272
}
7373

74-
fn clients_line<I>(x: &'static str) -> impl Parser<I, Output = u64>
74+
fn clients_line<I>(x: &'static str) -> impl Parser<I, Output = (u64, Option<u64>)>
7575
where
7676
I: Stream<Token = char>,
7777
I::Error: ParseError<I::Token, I::Range, I::Position>,
@@ -83,15 +83,18 @@ where
8383
optional((token('/'), digits())),
8484
optional(newline().map(drop).or(eof())),
8585
)
86-
.map(|(_, _, x, _, _): (_, _, u64, _, _)| x)
86+
.map(|(_, _, x, y, _): (_, _, u64, Option<(_, u64)>, _)| (x, y.map(|(_, v)| v)))
8787
}
8888

8989
#[derive(Debug)]
9090
enum RecoveryStat {
91-
Status(RecoveryStatus),
9291
Completed(u64),
9392
Connected(u64),
9493
Evicted(u64),
94+
RecoveryDuration(u64),
95+
Status(RecoveryStatus),
96+
TimeRemaining(u64),
97+
Total(Option<u64>),
9598
}
9699

97100
pub struct StatName(pub String);
@@ -105,6 +108,19 @@ where
105108
many1(alpha_num().or(one_of("_-".chars()))).map(StatName)
106109
}
107110

111+
fn simple_client_stat<I>(
112+
name: &'static str,
113+
constructor: fn(u64) -> RecoveryStat,
114+
) -> impl Parser<I, Output = Vec<RecoveryStat>>
115+
where
116+
I: Stream<Token = char>,
117+
I::Error: ParseError<I::Token, I::Range, I::Position>,
118+
{
119+
clients_line(name)
120+
.skip(optional(newline()))
121+
.map(move |(x, _)| vec![constructor(x)])
122+
}
123+
108124
fn target_recovery_stats<I>() -> impl Parser<I, Output = Vec<RecoveryStat>>
109125
where
110126
I: Stream<Token = char>,
@@ -113,22 +129,22 @@ where
113129
many(choice((
114130
status_line()
115131
.skip(optional(newline()))
116-
.map(RecoveryStat::Status)
117-
.map(Some),
118-
clients_line("completed_clients")
119-
.skip(optional(newline()))
120-
.map(RecoveryStat::Completed)
121-
.map(Some),
132+
.map(|x| vec![RecoveryStat::Status(x)]),
133+
simple_client_stat("recovery_duration", RecoveryStat::RecoveryDuration),
134+
simple_client_stat("completed_clients", RecoveryStat::Completed),
135+
simple_client_stat("time_remaining", RecoveryStat::TimeRemaining),
136+
simple_client_stat("evicted_clients", RecoveryStat::Evicted),
122137
clients_line("connected_clients")
123138
.skip(optional(newline()))
124-
.map(RecoveryStat::Connected)
125-
.map(Some),
126-
clients_line("evicted_clients")
127-
.skip(optional(newline()))
128-
.map(RecoveryStat::Evicted)
129-
.map(Some),
139+
.map(|(x, y)| {
140+
let mut stats = vec![RecoveryStat::Connected(x)];
141+
if let Some(total) = y {
142+
stats.push(RecoveryStat::Total(Some(total)));
143+
}
144+
stats
145+
}),
130146
// This will ignore line/field we don't care
131-
attempt((stat_name(), token(':'), till_newline().skip(newline()))).map(|_| None),
147+
attempt((stat_name(), token(':'), till_newline().skip(newline()))).map(|_| vec![]),
132148
)))
133149
.map(|xs: Vec<_>| xs.into_iter().flatten().collect())
134150
}
@@ -176,6 +192,28 @@ where
176192
value: *value,
177193
})
178194
}
195+
RecoveryStat::RecoveryDuration(value) => {
196+
TargetStats::RecoveryDuration(TargetStat {
197+
kind,
198+
param: param.clone(),
199+
target: target.clone(),
200+
value: *value,
201+
})
202+
}
203+
RecoveryStat::TimeRemaining(value) => {
204+
TargetStats::RecoveryTimeRemaining(TargetStat {
205+
kind,
206+
param: param.clone(),
207+
target: target.clone(),
208+
value: *value,
209+
})
210+
}
211+
RecoveryStat::Total(value) => TargetStats::RecoveryTotalClients(TargetStat {
212+
kind,
213+
param: param.clone(),
214+
target: target.clone(),
215+
value: value.unwrap_or(0),
216+
}),
179217
})
180218
.collect()
181219
})
@@ -186,19 +224,17 @@ where
186224
I: Stream<Token = char>,
187225
I::Error: ParseError<I::Token, I::Range, I::Position>,
188226
{
189-
many(
190-
(
191-
target_status(),
192-
skip_until(attempt(ost_or_mdt().map(drop)).or(eof())),
193-
)
194-
.map(|(x, _)| x.into_iter().map(Record::Target).collect()),
227+
(
228+
target_status(),
229+
skip_until(attempt(ost_or_mdt().map(drop)).or(eof())),
195230
)
196-
.map(|x: Vec<Vec<Record>>| x.into_iter().flatten().collect())
231+
.map(|(x, _)| x.into_iter().map(Record::Target).collect())
197232
}
198233

199234
#[cfg(test)]
200235
mod tests {
201-
use crate::recovery_status_parser::{clients_line, parse, target_recovery_stats};
236+
use crate::parser::parse;
237+
use crate::recovery_status_parser::{clients_line, target_recovery_stats};
202238
use combine::{Parser, parser::EasyParser, stream::position};
203239

204240
#[test]
@@ -238,11 +274,11 @@ mod tests {
238274
#[test]
239275
fn test_clients_line() {
240276
let result = clients_line("completed_clients").parse("completed_clients: 3/7\n");
241-
assert_eq!(result, Ok((3, "")));
277+
assert_eq!(result, Ok(((3, Some(7)), "")));
242278
let result = clients_line("connected_clients").parse("connected_clients: 3/7\n");
243-
assert_eq!(result, Ok((3, "")));
279+
assert_eq!(result, Ok(((3, Some(7)), "")));
244280
let result = clients_line("completed_clients").parse("completed_clients: 3\n");
245-
assert_eq!(result, Ok((3, "")));
281+
assert_eq!(result, Ok(((3, None), "")));
246282
}
247283

248284
#[test]
@@ -259,7 +295,19 @@ IR: ENABLED
259295

260296
let (records, _): (Vec<_>, _) = target_recovery_stats().parse(x).unwrap();
261297

262-
insta::assert_debug_snapshot!(records);
298+
insta::assert_debug_snapshot!(records, @r"
299+
[
300+
Status(
301+
Complete,
302+
),
303+
RecoveryDuration(
304+
150,
305+
),
306+
Completed(
307+
4,
308+
),
309+
]
310+
");
263311
}
264312

265313
#[test]
@@ -275,6 +323,26 @@ completed_clients: 3
275323

276324
let (records, _): (Vec<_>, _) = target_recovery_stats().parse(x).unwrap();
277325

278-
insta::assert_debug_snapshot!(records);
326+
insta::assert_debug_snapshot!(records, @r"
327+
[
328+
Status(
329+
Recovering,
330+
),
331+
TimeRemaining(
332+
119,
333+
),
334+
Connected(
335+
3,
336+
),
337+
Total(
338+
Some(
339+
7,
340+
),
341+
),
342+
Completed(
343+
3,
344+
),
345+
]
346+
");
279347
}
280348
}

lustre-collector/src/snapshots/lustre_collector__parser__tests__params.snap

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,6 @@ expression: params()
6767
"qmt.*.*.glb-grp",
6868
"nodemap.*.dt_stats",
6969
"nodemap.*.md_stats",
70+
"obdfilter.*OST*.recovery_status",
71+
"mdt.*MDT*.recovery_status",
7072
]

lustre-collector/src/snapshots/lustre_collector__parser__tests__valid_fixture_lustre-2.14.0_ddn145__2.14.0_ddn145_recovery.txt.snap

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,77 @@ source: lustre-collector/src/parser.rs
33
expression: result
44
---
55
(
6-
[],
7-
"obdfilter.fs-OST0000.recovery_status=status: INACTIVE\nobdfilter.fs-OST0001.recovery_status=status: INACTIVE\nmdt.fs-MDT0000.recovery_status=\nstatus: COMPLETE\nrecovery_start: 1715712434\nrecovery_duration: 0\ncompleted_clients: 1/1\nreplayed_requests: 0\nlast_transno: 4294967298\nVBR: DISABLED\nIR: DISABLED\n",
6+
[
7+
Target(
8+
RecoveryStatus(
9+
TargetStat {
10+
kind: Ost,
11+
param: Param(
12+
"recovery_status",
13+
),
14+
target: Target(
15+
"fs-OST0000",
16+
),
17+
value: Inactive,
18+
},
19+
),
20+
),
21+
Target(
22+
RecoveryStatus(
23+
TargetStat {
24+
kind: Ost,
25+
param: Param(
26+
"recovery_status",
27+
),
28+
target: Target(
29+
"fs-OST0001",
30+
),
31+
value: Inactive,
32+
},
33+
),
34+
),
35+
Target(
36+
RecoveryStatus(
37+
TargetStat {
38+
kind: Mdt,
39+
param: Param(
40+
"recovery_status",
41+
),
42+
target: Target(
43+
"fs-MDT0000",
44+
),
45+
value: Complete,
46+
},
47+
),
48+
),
49+
Target(
50+
RecoveryDuration(
51+
TargetStat {
52+
kind: Mdt,
53+
param: Param(
54+
"recovery_status",
55+
),
56+
target: Target(
57+
"fs-MDT0000",
58+
),
59+
value: 0,
60+
},
61+
),
62+
),
63+
Target(
64+
RecoveryCompletedClients(
65+
TargetStat {
66+
kind: Mdt,
67+
param: Param(
68+
"recovery_status",
69+
),
70+
target: Target(
71+
"fs-MDT0000",
72+
),
73+
value: 1,
74+
},
75+
),
76+
),
77+
],
78+
"",
879
)

0 commit comments

Comments
 (0)