Skip to content

Commit d39f692

Browse files
breuhanspoutn1k
andauthored
Add support for lustre OSD cache statistics and update related tests (#97)
* Add support for lustre OSD cache statistics and update related tests Signed-off-by: Andy Breuhan <abreuhan@ddn.com> * Delete failing tests Signed-off-by: Andy Breuhan <abreuhan@ddn.com> * Add a new test to improve test coverage Signed-off-by: Andy Breuhan <abreuhan@ddn.com> * Fix remove duplicate line * Address review comments * Add newlines --------- Signed-off-by: Andy Breuhan <abreuhan@ddn.com> Co-authored-by: Jean-Baptiste Skutnik <jb.skutnik@gmail.com>
1 parent 2c177ec commit d39f692

19 files changed

+505
-59
lines changed

lustre-collector/src/fixtures/osd.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
osd-ldiskfs.fs-OST0003.stats=
2+
snapshot_time 1750226041.107594239 secs.nsecs
3+
start_time 1750176720.831807230 secs.nsecs
4+
elapsed_time 49320.275787009 secs.nsecs
15
osd-ldiskfs.MGS.filesfree=32531
26
osd-ldiskfs.fs-MDT0000.filesfree=1885343
37
osd-ldiskfs.fs-OST0000.filesfree=39110

lustre-collector/src/fixtures/osd_active.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,23 @@
1+
osd-ldiskfs.ai400x2-OST0003.stats=
2+
snapshot_time 1750226041.107629473 secs.nsecs
3+
start_time 1750176720.390243856 secs.nsecs
4+
elapsed_time 49320.717385617 secs.nsecs
5+
get_page 375 samples [usecs] 1 23 2956 28656
6+
cache_access 297 samples [pages] 830 1024 303934
7+
cache_hit 0 samples [pages] 0 0 0
8+
cache_miss 297 samples [pages] 830 1024 303934
19
osd-ldiskfs.MGS.filesfree=130871
210
osd-ldiskfs.ai400x2-MDT0000.filesfree=97713887
311
osd-ldiskfs.ai400x2-OST0000.filesfree=1073740846
412
osd-ldiskfs.ai400x2-OST0001.filesfree=1073740847
13+
osd-ldiskfs.ai400x2-OST0001.stats=
14+
snapshot_time 1750226041.107629473 secs.nsecs
15+
start_time 1750176720.390243856 secs.nsecs
16+
elapsed_time 49320.717385617 secs.nsecs
17+
get_page 375 samples [usecs] 1 23 2956 28656
18+
cache_access 297 samples [pages] 830 1024 303934
19+
cache_hit 123 samples [pages] 0 1024 303934
20+
cache_miss 297 samples [pages] 830 1024 303934
521
osd-ldiskfs.MGS.filestotal=131072
622
osd-ldiskfs.ai400x2-MDT0000.filestotal=97714176
723
osd-ldiskfs.ai400x2-OST0000.filestotal=1073741824

lustre-collector/src/osd_parser.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ use crate::{
77
base_parsers::{digits, param, period, target, till_newline, till_period},
88
brw_stats_parser::brw_stats,
99
quota::quota_parser::quota_stats_osd,
10-
types::{BrwStats, Param, Record, Target, TargetStat, TargetStats, TargetVariant},
10+
stats_parser::stats,
11+
types::{BrwStats, Param, Record, Stat, Target, TargetStat, TargetStats, TargetVariant},
1112
};
1213
use combine::{
1314
Parser, attempt, choice,
@@ -16,6 +17,7 @@ use combine::{
1617
stream::{Stream, StreamErrorFor},
1718
};
1819

20+
pub(crate) const OSD: &str = "osd";
1921
pub(crate) const FILES_FREE: &str = "filesfree";
2022
pub(crate) const FILES_TOTAL: &str = "filestotal";
2123
pub(crate) const KBYTES_AVAIL: &str = "kbytesavail";
@@ -24,13 +26,15 @@ pub(crate) const KBYTES_TOTAL: &str = "kbytestotal";
2426
pub(crate) const FS_TYPE: &str = "fstype";
2527

2628
pub(crate) const BRW_STATS: &str = "brw_stats";
29+
pub(crate) const STATS: &str = "stats";
2730

2831
pub(crate) const QUOTA_ACCT_GRP: &str = "quota_slave.acct_group";
2932
pub(crate) const QUOTA_ACCT_USR: &str = "quota_slave.acct_user";
3033
pub(crate) const QUOTA_ACCT_PRJ: &str = "quota_slave.acct_project";
3134

3235
pub(crate) fn params() -> Vec<String> {
3336
vec![
37+
format!("osd-*.*.{STATS}"),
3438
format!("osd-*.*.{FILES_FREE}"),
3539
format!("osd-*.*.{FILES_TOTAL}"),
3640
format!("osd-*.*.{FS_TYPE}"),
@@ -60,6 +64,8 @@ enum OsdStat {
6064
KBytesTotal(u64),
6165
BrwStats(Vec<BrwStats>),
6266
QuotaStats(QuotaStatsOsd),
67+
/// Generic OSD statistics (performance counters, operation counts)
68+
Stats(Vec<Stat>),
6369
}
6470

6571
fn target_and_variant<I>() -> impl Parser<I, Output = (Target, TargetVariant)>
@@ -88,6 +94,7 @@ where
8894
I::Error: ParseError<I::Token, I::Range, I::Position>,
8995
{
9096
choice((
97+
(param(STATS), stats().map(OsdStat::Stats)),
9198
(param(BRW_STATS), brw_stats().map(OsdStat::BrwStats)),
9299
(
93100
param(FILES_FREE),
@@ -153,6 +160,12 @@ where
153160
{
154161
(target_and_variant(), osd_stat())
155162
.map(|((target, kind), (param, stat))| match stat {
163+
OsdStat::Stats(value) => TargetStats::Stats(TargetStat {
164+
kind,
165+
target,
166+
param,
167+
value,
168+
}),
156169
OsdStat::FilesFree(value) => TargetStats::FilesFree(TargetStat {
157170
kind,
158171
target,

lustre-collector/src/snapshots/lustre_collector__osd_parser__tests__osd_active_stats.snap

Lines changed: 155 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,86 @@
11
---
2-
source: src/osd_parser.rs
2+
source: lustre-collector/src/osd_parser.rs
33
expression: result
44
---
55
(
66
[
7+
Target(
8+
Stats(
9+
TargetStat {
10+
kind: Ost,
11+
param: Param(
12+
"stats",
13+
),
14+
target: Target(
15+
"ai400x2-OST0003",
16+
),
17+
value: [
18+
Stat {
19+
name: "get_page",
20+
units: "usecs",
21+
samples: 375,
22+
min: Some(
23+
1,
24+
),
25+
max: Some(
26+
23,
27+
),
28+
sum: Some(
29+
2956,
30+
),
31+
sumsquare: Some(
32+
28656,
33+
),
34+
},
35+
Stat {
36+
name: "cache_access",
37+
units: "pages",
38+
samples: 297,
39+
min: Some(
40+
830,
41+
),
42+
max: Some(
43+
1024,
44+
),
45+
sum: Some(
46+
303934,
47+
),
48+
sumsquare: None,
49+
},
50+
Stat {
51+
name: "cache_hit",
52+
units: "pages",
53+
samples: 0,
54+
min: Some(
55+
0,
56+
),
57+
max: Some(
58+
0,
59+
),
60+
sum: Some(
61+
0,
62+
),
63+
sumsquare: None,
64+
},
65+
Stat {
66+
name: "cache_miss",
67+
units: "pages",
68+
samples: 297,
69+
min: Some(
70+
830,
71+
),
72+
max: Some(
73+
1024,
74+
),
75+
sum: Some(
76+
303934,
77+
),
78+
sumsquare: None,
79+
},
80+
],
81+
},
82+
),
83+
),
784
Target(
885
FilesFree(
986
TargetStat {
@@ -60,6 +137,83 @@ expression: result
60137
},
61138
),
62139
),
140+
Target(
141+
Stats(
142+
TargetStat {
143+
kind: Ost,
144+
param: Param(
145+
"stats",
146+
),
147+
target: Target(
148+
"ai400x2-OST0001",
149+
),
150+
value: [
151+
Stat {
152+
name: "get_page",
153+
units: "usecs",
154+
samples: 375,
155+
min: Some(
156+
1,
157+
),
158+
max: Some(
159+
23,
160+
),
161+
sum: Some(
162+
2956,
163+
),
164+
sumsquare: Some(
165+
28656,
166+
),
167+
},
168+
Stat {
169+
name: "cache_access",
170+
units: "pages",
171+
samples: 297,
172+
min: Some(
173+
830,
174+
),
175+
max: Some(
176+
1024,
177+
),
178+
sum: Some(
179+
303934,
180+
),
181+
sumsquare: None,
182+
},
183+
Stat {
184+
name: "cache_hit",
185+
units: "pages",
186+
samples: 123,
187+
min: Some(
188+
0,
189+
),
190+
max: Some(
191+
1024,
192+
),
193+
sum: Some(
194+
303934,
195+
),
196+
sumsquare: None,
197+
},
198+
Stat {
199+
name: "cache_miss",
200+
units: "pages",
201+
samples: 297,
202+
min: Some(
203+
830,
204+
),
205+
max: Some(
206+
1024,
207+
),
208+
sum: Some(
209+
303934,
210+
),
211+
sumsquare: None,
212+
},
213+
],
214+
},
215+
),
216+
),
63217
Target(
64218
FilesTotal(
65219
TargetStat {

lustre-collector/src/snapshots/lustre_collector__osd_parser__tests__osd_stats.snap

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,23 @@
11
---
2-
source: src/osd_parser.rs
2+
source: lustre-collector/src/osd_parser.rs
33
expression: result
44
---
55
(
66
[
7+
Target(
8+
Stats(
9+
TargetStat {
10+
kind: Ost,
11+
param: Param(
12+
"stats",
13+
),
14+
target: Target(
15+
"fs-OST0003",
16+
),
17+
value: [],
18+
},
19+
),
20+
),
721
Target(
822
FilesFree(
923
TargetStat {

lustre-collector/src/snapshots/lustre_collector__parser__tests__params.snap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ expression: params()
88
"lnet_memused",
99
"health_check",
1010
"mdt.*.exports.*.uuid",
11+
"osd-*.*.stats",
1112
"osd-*.*.filesfree",
1213
"osd-*.*.filestotal",
1314
"osd-*.*.fstype",

lustre-collector/src/snapshots/lustre_collector__stats_parser__tests__stats.snap

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
source: src/stats_parser.rs
2+
source: lustre-collector/src/stats_parser.rs
33
expression: result
44
---
55
(
@@ -109,6 +109,68 @@ expression: result
109109
sum: None,
110110
sumsquare: None,
111111
},
112+
Stat {
113+
name: "get_page",
114+
units: "usecs",
115+
samples: 13,
116+
min: Some(
117+
0,
118+
),
119+
max: Some(
120+
3,
121+
),
122+
sum: Some(
123+
6,
124+
),
125+
sumsquare: Some(
126+
18,
127+
),
128+
},
129+
Stat {
130+
name: "cache_access",
131+
units: "pages",
132+
samples: 4,
133+
min: Some(
134+
1,
135+
),
136+
max: Some(
137+
25,
138+
),
139+
sum: Some(
140+
52,
141+
),
142+
sumsquare: None,
143+
},
144+
Stat {
145+
name: "cache_hit",
146+
units: "pages",
147+
samples: 4,
148+
min: Some(
149+
1,
150+
),
151+
max: Some(
152+
25,
153+
),
154+
sum: Some(
155+
52,
156+
),
157+
sumsquare: None,
158+
},
159+
Stat {
160+
name: "many_credits",
161+
units: "reqs",
162+
samples: 1,
163+
min: Some(
164+
1,
165+
),
166+
max: Some(
167+
1,
168+
),
169+
sum: Some(
170+
1,
171+
),
172+
sumsquare: None,
173+
},
112174
],
113175
"",
114176
)

lustre-collector/src/snapshots/lustre_collector__tests__params.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
source: lustre-collector/src/lib.rs
33
expression: "xs.join(\" \")"
44
---
5-
memused memused_max lnet_memused health_check mdt.*.exports.*.uuid osd-*.*.filesfree osd-*.*.filestotal osd-*.*.fstype osd-*.*.kbytesavail osd-*.*.kbytesfree osd-*.*.kbytestotal osd-*.*.brw_stats osd-*.*.quota_slave.acct_group osd-*.*.quota_slave.acct_user osd-*.*.quota_slave.acct_project mgs.*.mgs.stats mgs.*.mgs.threads_max mgs.*.mgs.threads_min mgs.*.mgs.threads_started mgs.*.num_exports obdfilter.*OST*.stats obdfilter.*OST*.num_exports obdfilter.*OST*.tot_dirty obdfilter.*OST*.tot_granted obdfilter.*OST*.tot_pending obdfilter.*OST*.exports.*.stats ost.OSS.ost.stats ost.OSS.ost_io.stats ost.OSS.ost_create.stats ost.OSS.ost_out.stats ost.OSS.ost_seq.stats mds.MDS.mdt.stats mds.MDS.mdt_fld.stats mds.MDS.mdt_io.stats mds.MDS.mdt_out.stats mds.MDS.mdt_readpage.stats mds.MDS.mdt_seqm.stats mds.MDS.mdt_seqs.stats mds.MDS.mdt_setattr.stats mdt.*.md_stats mdt.*MDT*.num_exports mdt.*MDT*.exports.*.stats ldlm.namespaces.{mdt-,filter-}*.contended_locks ldlm.namespaces.{mdt-,filter-}*.contention_seconds ldlm.namespaces.{mdt-,filter-}*.ctime_age_limit ldlm.namespaces.{mdt-,filter-}*.early_lock_cancel ldlm.namespaces.{mdt-,filter-}*.lock_count ldlm.namespaces.{mdt-,filter-}*.lock_timeouts ldlm.namespaces.{mdt-,filter-}*.lock_unused_count ldlm.namespaces.{mdt-,filter-}*.lru_max_age ldlm.namespaces.{mdt-,filter-}*.lru_size ldlm.namespaces.{mdt-,filter-}*.max_nolock_bytes ldlm.namespaces.{mdt-,filter-}*.max_parallel_ast ldlm.namespaces.{mdt-,filter-}*.resource_count ldlm.services.ldlm_canceld.stats ldlm.services.ldlm_cbd.stats llite.*.stats mdd.*.changelog_users qmt.*.*.glb-usr qmt.*.*.glb-prj qmt.*.*.glb-grp nodemap.*.dt_stats nodemap.*.md_stats
5+
memused memused_max lnet_memused health_check mdt.*.exports.*.uuid osd-*.*.stats osd-*.*.filesfree osd-*.*.filestotal osd-*.*.fstype osd-*.*.kbytesavail osd-*.*.kbytesfree osd-*.*.kbytestotal osd-*.*.brw_stats osd-*.*.quota_slave.acct_group osd-*.*.quota_slave.acct_user osd-*.*.quota_slave.acct_project mgs.*.mgs.stats mgs.*.mgs.threads_max mgs.*.mgs.threads_min mgs.*.mgs.threads_started mgs.*.num_exports obdfilter.*OST*.stats obdfilter.*OST*.num_exports obdfilter.*OST*.tot_dirty obdfilter.*OST*.tot_granted obdfilter.*OST*.tot_pending obdfilter.*OST*.exports.*.stats ost.OSS.ost.stats ost.OSS.ost_io.stats ost.OSS.ost_create.stats ost.OSS.ost_out.stats ost.OSS.ost_seq.stats mds.MDS.mdt.stats mds.MDS.mdt_fld.stats mds.MDS.mdt_io.stats mds.MDS.mdt_out.stats mds.MDS.mdt_readpage.stats mds.MDS.mdt_seqm.stats mds.MDS.mdt_seqs.stats mds.MDS.mdt_setattr.stats mdt.*.md_stats mdt.*MDT*.num_exports mdt.*MDT*.exports.*.stats ldlm.namespaces.{mdt-,filter-}*.contended_locks ldlm.namespaces.{mdt-,filter-}*.contention_seconds ldlm.namespaces.{mdt-,filter-}*.ctime_age_limit ldlm.namespaces.{mdt-,filter-}*.early_lock_cancel ldlm.namespaces.{mdt-,filter-}*.lock_count ldlm.namespaces.{mdt-,filter-}*.lock_timeouts ldlm.namespaces.{mdt-,filter-}*.lock_unused_count ldlm.namespaces.{mdt-,filter-}*.lru_max_age ldlm.namespaces.{mdt-,filter-}*.lru_size ldlm.namespaces.{mdt-,filter-}*.max_nolock_bytes ldlm.namespaces.{mdt-,filter-}*.max_parallel_ast ldlm.namespaces.{mdt-,filter-}*.resource_count ldlm.services.ldlm_canceld.stats ldlm.services.ldlm_cbd.stats llite.*.stats mdd.*.changelog_users qmt.*.*.glb-usr qmt.*.*.glb-prj qmt.*.*.glb-grp nodemap.*.dt_stats nodemap.*.md_stats

0 commit comments

Comments
 (0)