Skip to content

Commit d92c778

Browse files
authored
[nexus] allow abbreviating ereport 'class' as 'k' (#8804)
This was one of @cbiffle's ideas to save a few bytes of CBO. Since we expect to encode a "class" key for every ereport, shortening the fields that are included in every message has a meaningful impact on how much we can buffer. Also, loss records (which are generated when the SP restarts or when ereports don't fit in the buffer) lack a class string, because the presence of a top-level "lost" key can be used to identify them. So, we should handle that here, as well. Also includes some SP sim improvements for testing this.
1 parent dfac975 commit d92c778

File tree

5 files changed

+308
-101
lines changed

5 files changed

+308
-101
lines changed

gateway-test-utils/configs/sp_sim_config.test.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ restart_id = "0d3e464a-666e-4687-976f-90e31238be8b"
5555
task_name = "task_thermal_server"
5656
task_gen = 1
5757
uptime = 1235
58-
class = "oxide.sidecar.thermal.sensor_read_error"
58+
k = "oxide.sidecar.thermal.sensor_read_error"
5959
sensor = { id = "dev-1", device = "fake-tmp-sensor", location = "South", presence = "Failed" }
6060
error = "DeviceError"
6161

@@ -209,14 +209,14 @@ restart_id = "af1ebf85-36ba-4c31-bbec-b9825d6d9d8b"
209209
task_name = "task_apollo_server"
210210
task_gen = 13
211211
uptime = 1233
212-
class = "gov.nasa.apollo.o2_tanks.stir.begin"
212+
k = "gov.nasa.apollo.o2_tanks.stir.begin"
213213
message = "stirring the tanks"
214214

215215
[[simulated_sps.gimlet.ereport_config.ereports]]
216216
task_name = "drv_ae35_server"
217217
task_gen = 1
218218
uptime = 1234
219-
class = "io.discovery.ae35.fault"
219+
k = "io.discovery.ae35.fault"
220220
message = "i've just picked up a fault in the AE-35 unit"
221221
de = { scheme = "fmd", authority = { product-id = "HAL-9000-series computer", server-id = "HAL 9000"}, mod-name = "ae35-diagnosis" }
222222
hours_to_failure = 72
@@ -225,22 +225,22 @@ hours_to_failure = 72
225225
task_name = "task_apollo_server"
226226
task_gen = 13
227227
uptime = 1237
228-
class = "gov.nasa.apollo.fault"
228+
k = "gov.nasa.apollo.fault"
229229
message = "houston, we have a problem"
230230
crew = ["Lovell", "Swigert", "Haise"]
231231

232232
[[simulated_sps.gimlet.ereport_config.ereports]]
233233
task_name = "drv_thingy_server"
234234
task_gen = 2
235235
uptime = 1240
236-
class = "flagrant_error"
236+
k = "flagrant_error"
237237
computer = false
238238

239239
[[simulated_sps.gimlet.ereport_config.ereports]]
240240
task_name = "task_latex_server"
241241
task_gen = 1
242242
uptime = 1245
243-
class = "overfull_hbox"
243+
k = "overfull_hbox"
244244
badness = 10000
245245

246246
[[simulated_sps.gimlet]]

gateway/tests/integration_tests/ereports.rs

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,20 @@ mod sled0 {
7272
"af1ebf85-36ba-4c31-bbec-b9825d6d9d8b".parse().expect("is a valid UUID")
7373
});
7474

75+
def_ereport! {
76+
LOSS: {
77+
"baseboard_part_number": "SimGimletSp",
78+
"baseboard_serial_number": "SimGimlet00",
79+
"hubris_archive_id": "ffffffff",
80+
"hubris_version": "0.0.2",
81+
"hubris_task_name": "packrat",
82+
"hubris_task_gen": 0,
83+
"hubris_uptime_ms": 666,
84+
"ereport_message_version": 0,
85+
"lost": null,
86+
}
87+
}
88+
7589
def_ereport! {
7690
EREPORT_1: {
7791
"baseboard_part_number": "SimGimletSp",
@@ -82,7 +96,7 @@ mod sled0 {
8296
"hubris_task_gen": 13,
8397
"hubris_uptime_ms": 1233,
8498
"ereport_message_version": 0,
85-
"class": "gov.nasa.apollo.o2_tanks.stir.begin",
99+
"k": "gov.nasa.apollo.o2_tanks.stir.begin",
86100
"message": "stirring the tanks",
87101
}
88102
}
@@ -96,7 +110,7 @@ mod sled0 {
96110
"hubris_task_gen": 1,
97111
"hubris_uptime_ms": 1234,
98112
"ereport_message_version": 0,
99-
"class": "io.discovery.ae35.fault",
113+
"k": "io.discovery.ae35.fault",
100114
"message": "i've just picked up a fault in the AE-35 unit",
101115
"de": {
102116
"scheme": "fmd",
@@ -119,7 +133,7 @@ mod sled0 {
119133
"hubris_task_gen": 13,
120134
"hubris_uptime_ms": 1237,
121135
"ereport_message_version": 0,
122-
"class": "gov.nasa.apollo.fault",
136+
"k": "gov.nasa.apollo.fault",
123137
"message": "houston, we have a problem",
124138
"crew": [
125139
"Lovell",
@@ -139,7 +153,7 @@ mod sled0 {
139153
"hubris_task_gen": 2,
140154
"hubris_uptime_ms": 1240,
141155
"ereport_message_version": 0,
142-
"class": "flagrant_error",
156+
"k": "flagrant_error",
143157
"computer": false,
144158
}
145159
}
@@ -154,7 +168,7 @@ mod sled0 {
154168
"hubris_task_gen": 1,
155169
"hubris_uptime_ms": 1245,
156170
"ereport_message_version": 0,
157-
"class": "overfull_hbox",
171+
"k": "overfull_hbox",
158172
"badness": 10000,
159173
}
160174
}
@@ -166,6 +180,20 @@ mod sled1 {
166180
"55e30cc7-a109-492f-aca9-735ed725df3c".parse().expect("is a valid UUID")
167181
});
168182

183+
def_ereport! {
184+
LOSS: {
185+
"baseboard_part_number": "SimGimletSp",
186+
"baseboard_serial_number": "SimGimlet01",
187+
"hubris_archive_id": "ffffffff",
188+
"hubris_version": "0.0.2",
189+
"hubris_task_name": "packrat",
190+
"hubris_task_gen": 0,
191+
"hubris_uptime_ms": 666,
192+
"ereport_message_version": 0,
193+
"lost": null,
194+
}
195+
}
196+
169197
def_ereport! {
170198
EREPORT_1: {
171199
"baseboard_part_number": "SimGimletSp",
@@ -233,9 +261,14 @@ async fn ereports_basic() {
233261

234262
assert_eq!(restart_id.as_untyped_uuid(), &*sled1::RESTART_0);
235263
let reports = reports.items;
236-
assert_eq!(reports.len(), 1, "expected 1 ereport, found: {:#?}", reports);
264+
assert_eq!(reports.len(), 2, "expected 2 ereports, found: {:#?}", reports);
265+
237266
let report = &reports[0];
238267
assert_eq!(report.ena, ereport_types::Ena(1));
268+
assert_eq!(report.data, *sled1::LOSS);
269+
270+
let report = &reports[1];
271+
assert_eq!(report.ena, ereport_types::Ena(2));
239272
assert_eq!(report.data, *sled1::EREPORT_1);
240273

241274
testctx.teardown().await;
@@ -254,21 +287,26 @@ async fn ereports_limit() {
254287
restart_id: Uuid::new_v4(),
255288
start_ena: 0,
256289
committed_ena: None,
257-
limit: 2
290+
limit: 3
258291
}
259292
.response(client)
260293
.await
261294
);
262295

263296
assert_eq!(restart_id.as_untyped_uuid(), &*sled0::RESTART_0);
264297
let reports = reports.items;
265-
assert_eq!(reports.len(), 2, "expected 2 ereports, found: {:#?}", reports);
298+
assert_eq!(reports.len(), 3, "expected 3 ereports, found: {:#?}", reports);
299+
266300
let report = &reports[0];
267301
assert_eq!(report.ena, ereport_types::Ena(1));
268-
assert_eq!(report.data, *sled0::EREPORT_1);
302+
assert_eq!(report.data, *sled0::LOSS);
269303

270304
let report = &reports[1];
271305
assert_eq!(report.ena, ereport_types::Ena(2));
306+
assert_eq!(report.data, *sled0::EREPORT_1);
307+
308+
let report = &reports[2];
309+
assert_eq!(report.ena, ereport_types::Ena(3));
272310
assert_eq!(report.data, *sled0::EREPORT_2);
273311

274312
let ereport_types::Ereports { restart_id, reports } = dbg!(
@@ -288,11 +326,11 @@ async fn ereports_limit() {
288326
assert_eq!(reports.len(), 2, "expected 2 ereports, found: {:#?}", reports);
289327
let report = &reports[0];
290328
assert_eq!(report.ena, ereport_types::Ena(3));
291-
assert_eq!(report.data, *sled0::EREPORT_3);
329+
assert_eq!(report.data, *sled0::EREPORT_2);
292330

293331
let report = &reports[1];
294332
assert_eq!(report.ena, ereport_types::Ena(4));
295-
assert_eq!(report.data, *sled0::EREPORT_4);
333+
assert_eq!(report.data, *sled0::EREPORT_3);
296334

297335
testctx.teardown().await;
298336
}
@@ -323,11 +361,11 @@ async fn ereports_commit() {
323361
assert_eq!(reports.len(), 2, "expected 2 ereports, found: {:#?}", reports);
324362
let report = &reports[0];
325363
assert_eq!(report.ena, ereport_types::Ena(1));
326-
assert_eq!(report.data, *sled0::EREPORT_1);
364+
assert_eq!(report.data, *sled0::LOSS);
327365

328366
let report = &reports[1];
329367
assert_eq!(report.ena, ereport_types::Ena(2));
330-
assert_eq!(report.data, *sled0::EREPORT_2);
368+
assert_eq!(report.data, *sled0::EREPORT_1);
331369

332370
// Now, send a request with a committed ENA *and* a matching restart ID.
333371
let ereport_types::Ereports { restart_id, reports } = dbg!(
@@ -347,11 +385,11 @@ async fn ereports_commit() {
347385
assert_eq!(reports.len(), 2, "expected 2 ereports, found: {:#?}", reports);
348386
let report = &reports[0];
349387
assert_eq!(report.ena, ereport_types::Ena(3));
350-
assert_eq!(report.data, *sled0::EREPORT_3);
388+
assert_eq!(report.data, *sled0::EREPORT_2);
351389

352390
let report = &reports[1];
353391
assert_eq!(report.ena, ereport_types::Ena(4));
354-
assert_eq!(report.data, *sled0::EREPORT_4);
392+
assert_eq!(report.data, *sled0::EREPORT_3);
355393

356394
// Even if the start ENA of a subsequent request is 0, we shouldn't see any
357395
// ereports with ENAs lower than the committed ENA.
@@ -369,17 +407,21 @@ async fn ereports_commit() {
369407

370408
assert_eq!(restart_id.as_untyped_uuid(), &*sled0::RESTART_0);
371409
let reports = reports.items;
372-
assert_eq!(reports.len(), 3, "expected 3 ereports, found: {:#?}", reports);
410+
assert_eq!(reports.len(), 4, "expected 3 ereports, found: {:#?}", reports);
373411
let report = &reports[0];
374412
assert_eq!(report.ena, ereport_types::Ena(3));
375-
assert_eq!(report.data, *sled0::EREPORT_3);
413+
assert_eq!(report.data, *sled0::EREPORT_2);
376414

377415
let report = &reports[1];
378416
assert_eq!(report.ena, ereport_types::Ena(4));
379-
assert_eq!(report.data, *sled0::EREPORT_4);
417+
assert_eq!(report.data, *sled0::EREPORT_3);
380418

381419
let report = &reports[2];
382420
assert_eq!(report.ena, ereport_types::Ena(5));
421+
assert_eq!(report.data, *sled0::EREPORT_4);
422+
423+
let report = &reports[3];
424+
assert_eq!(report.ena, ereport_types::Ena(6));
383425
assert_eq!(report.data, *sled0::EREPORT_5);
384426

385427
testctx.teardown().await;

0 commit comments

Comments
 (0)