Skip to content

Commit 9334a78

Browse files
committed
Enhance recovery status parser to include additional metrics
RecoveryCompletedClients RecoveryDuration RecoveryTimeRemaining RecoveryTotalClients
1 parent d39f692 commit 9334a78

7 files changed

+267
-53
lines changed

lustre-collector/src/recovery_status_parser.rs

Lines changed: 91 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ where
7171
})
7272
}
7373

74-
fn clients_line<I>(x: &'static str) -> impl Parser<I, Output = u64>
74+
fn clients_line<I>(x: &'static str) -> impl Parser<I, Output = (u64, Option<u64>)>
7575
where
7676
I: Stream<Token = char>,
7777
I::Error: ParseError<I::Token, I::Range, I::Position>,
@@ -83,15 +83,18 @@ where
8383
optional((token('/'), digits())),
8484
optional(newline().map(drop).or(eof())),
8585
)
86-
.map(|(_, _, x, _, _): (_, _, u64, _, _)| x)
86+
.map(|(_, _, x, y, _): (_, _, u64, Option<(_, u64)>, _)| (x, y.map(|(_, v)| v)))
8787
}
8888

8989
#[derive(Debug)]
9090
enum RecoveryStat {
91-
Status(RecoveryStatus),
9291
Completed(u64),
9392
Connected(u64),
9493
Evicted(u64),
94+
RecoveryDuration(u64),
95+
Status(RecoveryStatus),
96+
TimeRemaining(u64),
97+
Total(Option<u64>),
9598
}
9699

97100
pub struct StatName(pub String);
@@ -105,6 +108,19 @@ where
105108
many1(alpha_num().or(one_of("_-".chars()))).map(StatName)
106109
}
107110

111+
fn simple_client_stat<I>(
112+
name: &'static str,
113+
constructor: fn(u64) -> RecoveryStat,
114+
) -> impl Parser<I, Output = Vec<RecoveryStat>>
115+
where
116+
I: Stream<Token = char>,
117+
I::Error: ParseError<I::Token, I::Range, I::Position>,
118+
{
119+
clients_line(name)
120+
.skip(optional(newline()))
121+
.map(move |(x, _)| vec![constructor(x)])
122+
}
123+
108124
fn target_recovery_stats<I>() -> impl Parser<I, Output = Vec<RecoveryStat>>
109125
where
110126
I: Stream<Token = char>,
@@ -113,22 +129,22 @@ where
113129
many(choice((
114130
status_line()
115131
.skip(optional(newline()))
116-
.map(RecoveryStat::Status)
117-
.map(Some),
118-
clients_line("completed_clients")
119-
.skip(optional(newline()))
120-
.map(RecoveryStat::Completed)
121-
.map(Some),
132+
.map(|x| vec![RecoveryStat::Status(x)]),
133+
simple_client_stat("recovery_duration", RecoveryStat::RecoveryDuration),
134+
simple_client_stat("completed_clients", RecoveryStat::Completed),
135+
simple_client_stat("time_remaining", RecoveryStat::TimeRemaining),
136+
simple_client_stat("evicted_clients", RecoveryStat::Evicted),
122137
clients_line("connected_clients")
123138
.skip(optional(newline()))
124-
.map(RecoveryStat::Connected)
125-
.map(Some),
126-
clients_line("evicted_clients")
127-
.skip(optional(newline()))
128-
.map(RecoveryStat::Evicted)
129-
.map(Some),
139+
.map(|(x, y)| {
140+
let mut stats = vec![RecoveryStat::Connected(x)];
141+
if let Some(total) = y {
142+
stats.push(RecoveryStat::Total(Some(total)));
143+
}
144+
stats
145+
}),
130146
// This will ignore line/field we don't care
131-
attempt((stat_name(), token(':'), till_newline().skip(newline()))).map(|_| None),
147+
attempt((stat_name(), token(':'), till_newline().skip(newline()))).map(|_| vec![]),
132148
)))
133149
.map(|xs: Vec<_>| xs.into_iter().flatten().collect())
134150
}
@@ -176,6 +192,28 @@ where
176192
value: *value,
177193
})
178194
}
195+
RecoveryStat::RecoveryDuration(value) => {
196+
TargetStats::RecoveryDuration(TargetStat {
197+
kind,
198+
param: param.clone(),
199+
target: target.clone(),
200+
value: *value,
201+
})
202+
}
203+
RecoveryStat::TimeRemaining(value) => {
204+
TargetStats::RecoveryTimeRemaining(TargetStat {
205+
kind,
206+
param: param.clone(),
207+
target: target.clone(),
208+
value: *value,
209+
})
210+
}
211+
RecoveryStat::Total(value) => TargetStats::RecoveryTotalClients(TargetStat {
212+
kind,
213+
param: param.clone(),
214+
target: target.clone(),
215+
value: value.unwrap_or(0),
216+
}),
179217
})
180218
.collect()
181219
})
@@ -238,11 +276,11 @@ mod tests {
238276
#[test]
239277
fn test_clients_line() {
240278
let result = clients_line("completed_clients").parse("completed_clients: 3/7\n");
241-
assert_eq!(result, Ok((3, "")));
279+
assert_eq!(result, Ok(((3, Some(7)), "")));
242280
let result = clients_line("connected_clients").parse("connected_clients: 3/7\n");
243-
assert_eq!(result, Ok((3, "")));
281+
assert_eq!(result, Ok(((3, Some(7)), "")));
244282
let result = clients_line("completed_clients").parse("completed_clients: 3\n");
245-
assert_eq!(result, Ok((3, "")));
283+
assert_eq!(result, Ok(((3, None), "")));
246284
}
247285

248286
#[test]
@@ -259,7 +297,19 @@ IR: ENABLED
259297

260298
let (records, _): (Vec<_>, _) = target_recovery_stats().parse(x).unwrap();
261299

262-
insta::assert_debug_snapshot!(records);
300+
insta::assert_debug_snapshot!(records, @r"
301+
[
302+
Status(
303+
Complete,
304+
),
305+
RecoveryDuration(
306+
150,
307+
),
308+
Completed(
309+
4,
310+
),
311+
]
312+
");
263313
}
264314

265315
#[test]
@@ -275,6 +325,26 @@ completed_clients: 3
275325

276326
let (records, _): (Vec<_>, _) = target_recovery_stats().parse(x).unwrap();
277327

278-
insta::assert_debug_snapshot!(records);
328+
insta::assert_debug_snapshot!(records, @r"
329+
[
330+
Status(
331+
Recovering,
332+
),
333+
TimeRemaining(
334+
119,
335+
),
336+
Connected(
337+
3,
338+
),
339+
Total(
340+
Some(
341+
7,
342+
),
343+
),
344+
Completed(
345+
3,
346+
),
347+
]
348+
");
279349
}
280350
}

lustre-collector/src/snapshots/lustre_collector__recovery_status_parser__tests__multiple.snap

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
source: src/recovery_status_parser.rs
2+
source: lustre-collector/src/recovery_status_parser.rs
33
expression: records
44
---
55
[
@@ -17,6 +17,20 @@ expression: records
1717
},
1818
),
1919
),
20+
Target(
21+
RecoveryDuration(
22+
TargetStat {
23+
kind: Ost,
24+
param: Param(
25+
"recovery_status",
26+
),
27+
target: Target(
28+
"fs-OST0000",
29+
),
30+
value: 150,
31+
},
32+
),
33+
),
2034
Target(
2135
RecoveryCompletedClients(
2236
TargetStat {
@@ -45,6 +59,20 @@ expression: records
4559
},
4660
),
4761
),
62+
Target(
63+
RecoveryDuration(
64+
TargetStat {
65+
kind: Ost,
66+
param: Param(
67+
"recovery_status",
68+
),
69+
target: Target(
70+
"fs-OST0001",
71+
),
72+
value: 150,
73+
},
74+
),
75+
),
4876
Target(
4977
RecoveryCompletedClients(
5078
TargetStat {
@@ -73,6 +101,20 @@ expression: records
73101
},
74102
),
75103
),
104+
Target(
105+
RecoveryDuration(
106+
TargetStat {
107+
kind: Mdt,
108+
param: Param(
109+
"recovery_status",
110+
),
111+
target: Target(
112+
"fs-MDT0000",
113+
),
114+
value: 1760,
115+
},
116+
),
117+
),
76118
Target(
77119
RecoveryCompletedClients(
78120
TargetStat {

lustre-collector/src/snapshots/lustre_collector__recovery_status_parser__tests__multiple_recovering.snap

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
source: src/recovery_status_parser.rs
2+
source: lustre-collector/src/recovery_status_parser.rs
33
expression: records
44
---
55
[
@@ -17,6 +17,20 @@ expression: records
1717
},
1818
),
1919
),
20+
Target(
21+
RecoveryDuration(
22+
TargetStat {
23+
kind: Ost,
24+
param: Param(
25+
"recovery_status",
26+
),
27+
target: Target(
28+
"fs-OST0000",
29+
),
30+
value: 150,
31+
},
32+
),
33+
),
2034
Target(
2135
RecoveryCompletedClients(
2236
TargetStat {
@@ -45,6 +59,20 @@ expression: records
4559
},
4660
),
4761
),
62+
Target(
63+
RecoveryDuration(
64+
TargetStat {
65+
kind: Ost,
66+
param: Param(
67+
"recovery_status",
68+
),
69+
target: Target(
70+
"fs-OST0001",
71+
),
72+
value: 150,
73+
},
74+
),
75+
),
4876
Target(
4977
RecoveryCompletedClients(
5078
TargetStat {
@@ -73,6 +101,20 @@ expression: records
73101
},
74102
),
75103
),
104+
Target(
105+
RecoveryDuration(
106+
TargetStat {
107+
kind: Ost,
108+
param: Param(
109+
"recovery_status",
110+
),
111+
target: Target(
112+
"fs-OST0004",
113+
),
114+
value: 13,
115+
},
116+
),
117+
),
76118
Target(
77119
RecoveryCompletedClients(
78120
TargetStat {
@@ -101,6 +143,20 @@ expression: records
101143
},
102144
),
103145
),
146+
Target(
147+
RecoveryDuration(
148+
TargetStat {
149+
kind: Mdt,
150+
param: Param(
151+
"recovery_status",
152+
),
153+
target: Target(
154+
"fs-MDT0000",
155+
),
156+
value: 1760,
157+
},
158+
),
159+
),
104160
Target(
105161
RecoveryCompletedClients(
106162
TargetStat {
@@ -129,6 +185,20 @@ expression: records
129185
},
130186
),
131187
),
188+
Target(
189+
RecoveryTimeRemaining(
190+
TargetStat {
191+
kind: Mdt,
192+
param: Param(
193+
"recovery_status",
194+
),
195+
target: Target(
196+
"fs-MDT0002",
197+
),
198+
value: 119,
199+
},
200+
),
201+
),
132202
Target(
133203
RecoveryConnectedClients(
134204
TargetStat {
@@ -143,6 +213,20 @@ expression: records
143213
},
144214
),
145215
),
216+
Target(
217+
RecoveryTotalClients(
218+
TargetStat {
219+
kind: Mdt,
220+
param: Param(
221+
"recovery_status",
222+
),
223+
target: Target(
224+
"fs-MDT0002",
225+
),
226+
value: 7,
227+
},
228+
),
229+
),
146230
Target(
147231
RecoveryCompletedClients(
148232
TargetStat {

lustre-collector/src/snapshots/lustre_collector__recovery_status_parser__tests__target_recovery_stats.snap

Lines changed: 0 additions & 12 deletions
This file was deleted.

0 commit comments

Comments
 (0)