diff --git a/Cargo.lock b/Cargo.lock index b094c9ff3..dd63387fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1115,9 +1115,11 @@ dependencies = [ "drv-spartan7-loader-api", "drv-spi-api", "drv-stm32xx-sys-api", + "fixedstr", "gnarle", "idol", "idol-runtime", + "microcbor", "num-traits", "pmbus", "ringbuf", diff --git a/app/gimlet/base.toml b/app/gimlet/base.toml index b7133df17..0ee610265 100644 --- a/app/gimlet/base.toml +++ b/app/gimlet/base.toml @@ -163,7 +163,7 @@ name = "drv-gimlet-seq-server" features = ["h753"] priority = 4 max-sizes = {flash = 131072, ram = 16384 } -stacksize = 2600 +stacksize = 2912 start = true task-slots = ["sys", "i2c_driver", {spi_driver = "spi2_driver"}, "hf", "jefe", "packrat"] notifications = ["timer", "vcore"] diff --git a/drv/cosmo-seq-server/Cargo.toml b/drv/cosmo-seq-server/Cargo.toml index 9e96d4628..f476111b7 100644 --- a/drv/cosmo-seq-server/Cargo.toml +++ b/drv/cosmo-seq-server/Cargo.toml @@ -16,11 +16,13 @@ drv-packrat-vpd-loader = { path = "../packrat-vpd-loader" } drv-spartan7-loader-api = { path = "../spartan7-loader-api" } drv-spi-api = { path = "../spi-api" } drv-stm32xx-sys-api = { path = "../stm32xx-sys-api" } +fixedstr = { path = "../../lib/fixedstr", features = ["microcbor"] } gnarle = { path = "../../lib/gnarle" } ringbuf = { path = "../../lib/ringbuf" } +microcbor = { path = "../../lib/microcbor" } userlib = { path = "../../sys/userlib", features = ["panic-messages"] } task-jefe-api = { path = "../../task/jefe-api" } -task-packrat-api = { path = "../../task/packrat-api", features = ["serde"] } +task-packrat-api = { path = "../../task/packrat-api", features = ["microcbor"] } static-cell = { path = "../../lib/static-cell" } cfg-if = { workspace = true } diff --git a/drv/cosmo-seq-server/src/main.rs b/drv/cosmo-seq-server/src/main.rs index 609e63ced..f6c7d13c0 100644 --- a/drv/cosmo-seq-server/src/main.rs +++ b/drv/cosmo-seq-server/src/main.rs @@ -10,21 +10,21 @@ use drv_cpu_seq_api::{ PowerState, SeqError as CpuSeqError, StateChangeReason, Transition, }; +use drv_hf_api::HostFlash; use drv_ice40_spi_program as ice40; use drv_packrat_vpd_loader::{read_vpd_and_load_packrat, Packrat}; use drv_spartan7_loader_api::Spartan7Loader; use drv_spi_api::{SpiDevice, SpiServer}; use drv_stm32xx_sys_api::{self as sys_api, Sys}; +use fixedstr::FixedStr; use idol_runtime::{NotificationHandler, RequestError}; +use ringbuf::{counted_ringbuf, ringbuf_entry, Count}; use task_jefe_api::Jefe; use userlib::{ hl, set_timer_relative, sys_get_timer, sys_recv_notification, task_slot, RecvMessage, }; -use drv_hf_api::HostFlash; -use ringbuf::{counted_ringbuf, ringbuf_entry, Count}; - include!(concat!(env!("OUT_DIR"), "/i2c_config.rs")); mod vcore; @@ -101,6 +101,14 @@ enum Trace { now: u64, }, UnexpectedInterrupt, + + EreportSent(#[count(children)] EreportClass, usize), + EreportLost( + #[count(children)] EreportClass, + usize, + task_packrat_api::EreportWriteError, + ), + EreportTooBig(#[count(children)] EreportClass), } counted_ringbuf!(Trace, 128, Trace::None); @@ -378,12 +386,18 @@ struct ServerImpl { seq: fmc_sequencer::Sequencer, espi: fmc_periph::espi::Espi, vcore: VCore, + packrat: Packrat, /// Static buffer for encoding ereports. This is a static so that we don't /// have it on the stack when encoding ereports. ereport_buf: &'static mut [u8; EREPORT_BUF_LEN], } -const EREPORT_BUF_LEN: usize = 256; +const EREPORT_BUF_LEN: usize = microcbor::max_cbor_len_for![ + Ereport, + Ereport, + // For FPGA MAPO/SMERR ereports + Ereport<&'static SeqFpgaRefdes>, +]; impl ServerImpl { fn new( @@ -421,7 +435,8 @@ impl ServerImpl { hf: HostFlash::from(HF.get_task_id()), seq, espi, - vcore: VCore::new(I2C.get_task_id(), packrat), + vcore: VCore::new(I2C.get_task_id()), + packrat, ereport_buf, } } @@ -537,26 +552,42 @@ impl ServerImpl { }); // From sp5-mobo-guide-56870_1.1.pdf table 72 - match (coretype0, coretype1, coretype2) { + let coretype_ok = match (coretype0, coretype1, coretype2) { // These correspond to Type-2 and Type-3 - (true, false, true) | (true, false, false) => (), + (true, false, true) | (true, false, false) => true, // Reject all other combos and return to A0 - _ => { - self.seq.power_ctrl.modify(|m| m.set_a0_en(false)); - return Err(CpuSeqError::UnrecognizedCPU); - } + _ => false, }; // From sp5-mobo-guide-56870_1.1.pdf table 73 - match (sp5r1, sp5r2, sp5r3, sp5r4) { + let sp5r_ok = match (sp5r1, sp5r2, sp5r3, sp5r4) { // There is only combo we accept here - (true, false, false, false) => (), + (true, false, false, false) => true, // Reject all other combos and return to A0 - _ => { - self.seq.power_ctrl.modify(|m| m.set_a0_en(false)); - return Err(CpuSeqError::UnrecognizedCPU); - } + _ => false, }; + + if !(coretype_ok && sp5r_ok) { + // Looks weird! + self.seq.power_ctrl.modify(|m| m.set_a0_en(false)); + let ereport = Ereport { + class: EreportClass::UnrecognizedCPU, + version: 0, + report: UnrecognizedCPU { + refdes: &HOST_CPU_REFDES, + coretype0, + coretype1, + coretype2, + sp5r1, + sp5r2, + sp5r3, + sp5r4, + }, + }; + deliver_ereport(&ereport, &self.packrat, self.ereport_buf); + return Err(CpuSeqError::UnrecognizedCPU); + } + // Turn on the voltage regulator undervolt alerts. self.enable_sequencer_interrupts(); @@ -758,8 +789,12 @@ impl ServerImpl { vddcr_cpu0: ifr.pwr_cont1_to_fpga1_alert, vddcr_cpu1: ifr.pwr_cont2_to_fpga1_alert, }; - self.vcore - .handle_pmbus_alert(which_rails, now, self.ereport_buf); + self.vcore.handle_pmbus_alert( + which_rails, + now, + &self.packrat, + self.ereport_buf, + ); // We need not instruct the sequencer to reset. PMBus alerts from // the RAA229620As are divided into two categories, "warnings" and @@ -787,7 +822,17 @@ impl ServerImpl { self.seq.ifr.modify(|h| h.set_thermtrip(false)); ringbuf_entry!(Trace::Thermtrip); action = InternalAction::ThermTrip; - // Great place for an ereport? + let ereport = Ereport { + class: EreportClass::Thermtrip, + version: 0, + report: &HOST_CPU_REFDES, + // TODO(eliza): eventually, it would be nice to include sequencer + // state registers here, however, we would need to modify the + // `fpga_regmap` codegen to let us get the raw bits out (since + // encoding the `...View` structs as CBOR uses a lot more bytes for + // field names and 8-bit `bool`s...) I'll do this eventually... + }; + deliver_ereport(&ereport, &self.packrat, self.ereport_buf); } if ifr.a0mapo { @@ -795,14 +840,36 @@ impl ServerImpl { self.seq.ifr.modify(|h| h.set_a0mapo(false)); ringbuf_entry!(Trace::A0MapoInterrupt); action = InternalAction::Mapo; - // Great place for an ereport? + + let ereport = Ereport { + class: EreportClass::A0Mapo, + version: 0, + report: &SEQ_FPGA_REFDES, + // TODO(eliza): eventually, it would be nice to include sequencer + // state registers here, however, we would need to modify the + // `fpga_regmap` codegen to let us get the raw bits out (since + // encoding the `...View` structs as CBOR uses a lot more bytes for + // field names and 8-bit `bool`s...) I'll do this eventually... + }; + deliver_ereport(&ereport, &self.packrat, self.ereport_buf); } if ifr.smerr_assert { self.seq.ifr.modify(|h| h.set_smerr_assert(false)); ringbuf_entry!(Trace::SmerrInterrupt); action = InternalAction::Smerr; - // Great place for an ereport? + + let ereport = Ereport { + class: EreportClass::Smerr, + version: 0, + report: &SEQ_FPGA_REFDES, + // TODO(eliza): eventually, it would be nice to include sequencer + // state registers here, however, we would need to modify the + // `fpga_regmap` codegen to let us get the raw bits out (since + // encoding the `...View` structs as CBOR uses a lot more bytes for + // field names and 8-bit `bool`s...) I'll do this eventually... + }; + deliver_ereport(&ereport, &self.packrat, self.ereport_buf); } // Fan Fault is unconnected // NIC MAPO is unconnected @@ -959,6 +1026,80 @@ impl NotificationHandler for ServerImpl { //////////////////////////////////////////////////////////////////////////////// +#[derive(Eq, PartialEq, Copy, Clone, microcbor::Encode, counters::Count)] +pub(crate) enum EreportClass { + // + // Interrupts + // + #[cbor(rename = "hw.cpu.thermtrip")] + Thermtrip, + #[cbor(rename = "hw.seq.smerr")] + Smerr, + #[cbor(rename = "hw.seq.a0_mapo")] + A0Mapo, + #[cbor(rename = "hw.pwr.pmbus.alert")] + PmbusAlert, + + // + // Initialization failures + // + #[cbor(rename = "hw.cpu.a0_fail.unknown")] + UnrecognizedCPU, +} + +pub(crate) type Ereport = task_packrat_api::Ereport; + +#[derive(microcbor::EncodeFields)] +pub(crate) struct UnrecognizedCPU { + #[cbor(flatten)] + refdes: &'static HostCpuRefdes, + coretype0: bool, + coretype1: bool, + coretype2: bool, + sp5r1: bool, + sp5r2: bool, + sp5r3: bool, + sp5r4: bool, +} + +#[derive(microcbor::EncodeFields)] +struct HostCpuRefdes { + refdes: FixedStr<2>, + dev_id: FixedStr<16>, +} + +#[derive(microcbor::EncodeFields)] +struct SeqFpgaRefdes { + refdes: FixedStr<3>, +} + +static SEQ_FPGA_REFDES: SeqFpgaRefdes = SeqFpgaRefdes { + refdes: FixedStr::from_str("U27"), +}; + +static HOST_CPU_REFDES: HostCpuRefdes = HostCpuRefdes { + refdes: FixedStr::from_str("P0"), + // TODO(eliza): can we get this from the `gateway-sp-messages` crate? + dev_id: FixedStr::from_str("sp5-host-cpu"), +}; + +pub(crate) fn deliver_ereport>( + ereport: &Ereport, + packrat: &Packrat, + buf: &mut [u8], +) { + match packrat.encode_ereport(ereport, buf) { + Ok(len) => ringbuf_entry!(Trace::EreportSent(ereport.class, len)), + Err(task_packrat_api::EreportEncodeError::Packrat { len, err }) => { + ringbuf_entry!(Trace::EreportLost(ereport.class, len, err)) + } + Err(task_packrat_api::EreportEncodeError::Encoder(_)) => { + ringbuf_entry!(Trace::EreportTooBig(ereport.class)) + } + } +} +//////////////////////////////////////////////////////////////////////////////// + mod idl { use drv_cpu_seq_api::StateChangeReason; include!(concat!(env!("OUT_DIR"), "/server_stub.rs")); diff --git a/drv/cosmo-seq-server/src/vcore.rs b/drv/cosmo-seq-server/src/vcore.rs index cd9824c30..ab2d1c820 100644 --- a/drv/cosmo-seq-server/src/vcore.rs +++ b/drv/cosmo-seq-server/src/vcore.rs @@ -13,10 +13,11 @@ //! use super::i2c_config; +use super::Ereport; use drv_i2c_api::ResponseCode; use drv_i2c_devices::raa229620a::{self, Raa229620A}; +use fixedstr::FixedStr; use ringbuf::*; -use serde::Serialize; use userlib::{sys_get_timer, units, TaskId}; pub(super) struct VCore { @@ -24,13 +25,13 @@ pub(super) struct VCore { vddcr_cpu0: Raa229620A, /// `PWR_CONT2`: This regulator controls `VDDCR_CPU1` and `VDDIO_SP5` rails. vddcr_cpu1: Raa229620A, - packrat: task_packrat_api::Packrat, } -#[derive(Copy, Clone, PartialEq, Serialize)] -#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +#[derive(Copy, Clone, PartialEq, microcbor::Encode)] enum Rail { + #[cbor(rename = "VDDCR_CPU0")] VddcrCpu0, + #[cbor(rename = "VDDCR_CPU1")] VddcrCpu1, } @@ -71,9 +72,6 @@ enum Trace { StatusCml(Rail, Result), StatusMfrSpecific(Rail, Result), I2cError(Rail, PmbusCmd, raa229620a::Error), - EreportSent(Rail, usize), - EreportLost(Rail, usize, task_packrat_api::EreportWriteError), - EreportTooBig(Rail), } #[derive(Copy, Clone, PartialEq)] @@ -104,7 +102,7 @@ const VCORE_UV_WARN_LIMIT: units::Volts = units::Volts(11.75); const VCORE_NSAMPLES: usize = 25; impl VCore { - pub fn new(i2c: TaskId, packrat: task_packrat_api::Packrat) -> Self { + pub fn new(i2c: TaskId) -> Self { let (device, rail) = i2c_config::pmbus::vddcr_cpu0_a0(i2c); let vddcr_cpu0 = Raa229620A::new(&device, rail); @@ -113,7 +111,6 @@ impl VCore { Self { vddcr_cpu0, vddcr_cpu1, - packrat, } } @@ -165,6 +162,7 @@ impl VCore { &self, mut rails: Rails, now: u64, + packrat: &task_packrat_api::Packrat, ereport_buf: &mut [u8], ) { ringbuf_entry!(Trace::PmbusAlert { @@ -176,6 +174,7 @@ impl VCore { now, Rail::VddcrCpu0, rails.vddcr_cpu0, + packrat, ereport_buf, ); rails.vddcr_cpu0 |= cpu0_state.faulted; @@ -184,6 +183,7 @@ impl VCore { now, Rail::VddcrCpu1, rails.vddcr_cpu1, + packrat, ereport_buf, ); rails.vddcr_cpu1 |= cpu1_state.faulted; @@ -258,6 +258,7 @@ impl VCore { now: u64, rail: Rail, alerted: bool, + packrat: &task_packrat_api::Packrat, ereport_buf: &mut [u8], ) -> RegulatorState { use pmbus::commands::raa229620a::STATUS_WORD; @@ -377,26 +378,18 @@ impl VCore { }; let ereport = Ereport { - k: "hw.pwr.pmbus.alert", - v: 0, - rail, - refdes: device.i2c_device().component_id(), - time: now, - pmbus_status, - pwr_good: power_good, + class: crate::EreportClass::PmbusAlert, + version: 0, + report: PmbusEreport { + rail, + refdes: FixedStr::from_str(device.i2c_device().component_id()), + time: now, + pmbus_status, + pwr_good: power_good, + }, }; - match self.packrat.serialize_ereport(&ereport, ereport_buf) { - Ok(len) => ringbuf_entry!(Trace::EreportSent(rail, len)), - Err(task_packrat_api::EreportSerializeError::Packrat { - len, - err, - }) => { - ringbuf_entry!(Trace::EreportLost(rail, len, err)) - } - Err(task_packrat_api::EreportSerializeError::Serialize(_)) => { - ringbuf_entry!(Trace::EreportTooBig(rail)) - } - } + + crate::deliver_ereport(&ereport, packrat, ereport_buf); // TODO(eliza): if POWER_GOOD has been deasserted, we should produce a // subsequent ereport for that. @@ -407,18 +400,16 @@ impl VCore { } } -#[derive(Serialize)] -struct Ereport { - k: &'static str, - v: usize, - refdes: &'static str, +#[derive(microcbor::EncodeFields)] +pub(crate) struct PmbusEreport { + refdes: FixedStr<{ crate::i2c_config::MAX_COMPONENT_ID_LEN }>, rail: Rail, time: u64, pwr_good: Option, pmbus_status: PmbusStatus, } -#[derive(Copy, Clone, Default, Serialize)] +#[derive(Copy, Clone, Default, microcbor::Encode)] struct PmbusStatus { word: Option, input: Option, diff --git a/drv/gimlet-seq-server/Cargo.toml b/drv/gimlet-seq-server/Cargo.toml index bef21f575..758813ce9 100644 --- a/drv/gimlet-seq-server/Cargo.toml +++ b/drv/gimlet-seq-server/Cargo.toml @@ -33,7 +33,6 @@ zerocopy-derive = { workspace = true } num-derive = { workspace = true } static_assertions = { workspace = true } spd = { workspace = true } -serde = { workspace = true } pmbus = { workspace = true } [build-dependencies] diff --git a/drv/gimlet-seq-server/src/main.rs b/drv/gimlet-seq-server/src/main.rs index a603ddaa1..3dfff115e 100644 --- a/drv/gimlet-seq-server/src/main.rs +++ b/drv/gimlet-seq-server/src/main.rs @@ -25,6 +25,7 @@ use drv_ice40_spi_program as ice40; use drv_packrat_vpd_loader::{read_vpd_and_load_packrat, Packrat}; use drv_spi_api::{SpiDevice, SpiServer}; use drv_stm32xx_sys_api as sys_api; +use fixedstr::FixedStr; use idol_runtime::{NotificationHandler, RequestError}; use seq_spi::{Addr, Reg}; use static_assertions::const_assert; @@ -84,7 +85,7 @@ enum Trace { A2Status(u8), A2, A0FailureDetails(Addr, u8), - A0Failed(#[count(children)] SeqError), + A0Failed(#[count(children)] A0Failure), A1Status(Result), A1Readbacks(u8), A1OutStatus(u8), @@ -154,6 +155,13 @@ enum Trace { retries_remaining: u8, }, StartFailed(#[count(children)] SeqError), + EreportSent(#[count(children)] EreportClass, usize), + EreportLost( + #[count(children)] EreportClass, + usize, + task_packrat_api::EreportWriteError, + ), + EreportTooBig(#[count(children)] EreportClass), } counted_ringbuf!(Trace, 128, Trace::None); @@ -202,42 +210,41 @@ struct ServerImpl { hf: hf_api::HostFlash, vcore: vcore::VCore, deadline: u64, + packrat: Packrat, // Buffer for encoding ereports. This is a static so that it's not on the // stack when handling interrupts. ereport_buf: &'static mut [u8; EREPORT_BUF_LEN], } const TIMER_INTERVAL: u32 = 10; -const EREPORT_BUF_LEN: usize = microcbor::max_cbor_len_for!( - task_packrat_api::Ereport -); +const EREPORT_BUF_LEN: usize = microcbor::max_cbor_len_for![ + Ereport, + Ereport, + Ereport<&'static HostCpuRefdes>, +]; -#[derive(microcbor::Encode)] +type Ereport = task_packrat_api::Ereport; + +#[derive(Copy, Clone, Eq, PartialEq, microcbor::Encode, counters::Count)] pub enum EreportClass { + #[cbor(rename = "hw.cpu.thermtrip")] + Thermtrip, #[cbor(rename = "hw.pwr.pmbus.alert")] PmbusAlert, -} -#[derive(microcbor::EncodeFields)] -pub(crate) enum EreportKind { - PmbusAlert { - refdes: fixedstr::FixedStr<{ crate::i2c_config::MAX_COMPONENT_ID_LEN }>, - rail: &'static fixedstr::FixedStr<10>, - time: u64, - pwr_good: Option, - pmbus_status: PmbusStatus, - }, -} - -#[derive(Copy, Clone, Default, microcbor::Encode)] -pub(crate) struct PmbusStatus { - word: Option, - input: Option, - iout: Option, - vout: Option, - temp: Option, - cml: Option, - mfr: Option, + #[cbor(rename = "hw.cpu.a0_fail.unknown")] + UnrecognizedCPU, + #[cbor(rename = "hw.cpu.a0_fail.no_cpu")] + NoCPUPresent, + + #[cbor(rename = "hw.a0_fail.timeout.a1")] + A1Timeout, + #[cbor(rename = "hw.a0_fail.timeout.a0")] + A0Timeout, + #[cbor(rename = "hw.a0_fail.timeout.groupc")] + A0TimeoutGroupC, + #[cbor(rename = "hw.pwr.pmbus.a0_fail.i2c_err")] + I2cFault, } impl ServerImpl { @@ -532,8 +539,9 @@ impl ServerImpl { jefe, hf, deadline: 0, - vcore: vcore::VCore::new(sys, packrat, &device, rail), + vcore: vcore::VCore::new(sys, &device, rail), ereport_buf, + packrat, }; // Power on, unless suppressed by the `stay-in-a2` feature @@ -573,7 +581,8 @@ impl NotificationHandler for ServerImpl { fn handle_notification(&mut self, bits: userlib::NotificationBits) { if bits.check_notification_mask(self.vcore.mask()) { - self.vcore.handle_notification(self.ereport_buf); + self.vcore + .handle_notification(&self.packrat, self.ereport_buf); } if !bits.has_timer_fired(notifications::TIMER_MASK) { @@ -819,7 +828,7 @@ impl ServerImpl { } if sys_get_timer().now > deadline { - return Err(self.a0_failure(SeqError::A1Timeout)); + return Err(self.a0_failure(A0Failure::A1Timeout)); } hl::sleep_for(1); @@ -833,7 +842,7 @@ impl ServerImpl { ringbuf_entry!(Trace::CPUPresent(present)); if !present { - return Err(self.a0_failure(SeqError::CPUNotPresent)); + return Err(self.a0_failure(A0Failure::NoCPUPresent)); } let coretype = sys.gpio_read(CORETYPE) != 0; @@ -853,7 +862,13 @@ impl ServerImpl { // to be low (VSS on Type-0/Type-1/Type-2). // if !coretype || !sp3r1 || sp3r2 { - return Err(self.a0_failure(SeqError::UnrecognizedCPU)); + return Err(self.a0_failure(A0Failure::UnrecognizedCPU( + Coretype { + coretype, + sp3r1, + sp3r2, + }, + ))); } // @@ -877,7 +892,7 @@ impl ServerImpl { } if sys_get_timer().now > deadline { - return Err(self.a0_failure(SeqError::A0TimeoutGroupC)); + return Err(self.a0_failure(A0Failure::A0TimeoutGroupC)); } hl::sleep_for(1); @@ -886,10 +901,10 @@ impl ServerImpl { // // And power up! // - if vcore_soc_on().is_err() { + if let Err(err) = vcore_soc_on() { // Uh-oh, the I2C write failed a bunch of times. Guess I'll // die! - return Err(self.a0_failure(SeqError::I2cFault)); + return Err(self.a0_failure(err)); } ringbuf_entry!(Trace::RailsOn); @@ -910,7 +925,7 @@ impl ServerImpl { } if sys_get_timer().now > deadline { - return Err(self.a0_failure(SeqError::A0Timeout)); + return Err(self.a0_failure(A0Failure::A0Timeout)); } hl::sleep_for(1); @@ -1004,12 +1019,11 @@ impl ServerImpl { } } - fn a0_failure(&mut self, err: SeqError) -> SeqError { + fn a0_failure(&mut self, err: A0Failure) -> SeqError { let record_reg = |addr| { - ringbuf_entry!(Trace::A0FailureDetails( - addr, - self.seq.read_byte(addr).unwrap_lite(), - )); + let byte = self.seq.read_byte(addr).unwrap_lite(); + ringbuf_entry!(Trace::A0FailureDetails(addr, byte)); + byte }; // @@ -1017,13 +1031,15 @@ impl ServerImpl { // buffer to allow this to be debugged. // ringbuf_entry!(Trace::A0Failed(err)); - record_reg(Addr::IFR); - record_reg(Addr::DBG_MAX_A0SMSTATUS); - record_reg(Addr::MAX_GROUPB_PG); - record_reg(Addr::MAX_GROUPC_PG); - record_reg(Addr::FLT_A0_SMSTATUS); - record_reg(Addr::FLT_GROUPB_PG); - record_reg(Addr::FLT_GROUPC_PG); + let seq_status = SeqStatus { + ifr: record_reg(Addr::IFR), + dbg_max_a0smstatus: record_reg(Addr::DBG_MAX_A0SMSTATUS), + max_groupb_pg: record_reg(Addr::MAX_GROUPB_PG), + max_groupc_pg: record_reg(Addr::MAX_GROUPC_PG), + flt_a0_smstatus: record_reg(Addr::FLT_A0_SMSTATUS), + flt_groupb_pg: record_reg(Addr::FLT_GROUPB_PG), + flt_groupc_pg: record_reg(Addr::FLT_GROUPC_PG), + }; // // Now put ourselves back in A2. @@ -1037,6 +1053,88 @@ impl ServerImpl { let _ = vcore_soc_off(); _ = self.hf.set_mux(hf_api::HfMuxState::SP); + let (ereport, err) = match err { + A0Failure::A1Timeout => ( + Ereport { + class: EreportClass::A1Timeout, + version: 0, + report: A0FailureEreport { + seq_status, + details: None, + }, + }, + SeqError::A1Timeout, + ), + A0Failure::A0Timeout => ( + Ereport { + class: EreportClass::A0Timeout, + version: 0, + report: A0FailureEreport { + seq_status, + details: None, + }, + }, + SeqError::A1Timeout, + ), + A0Failure::A0TimeoutGroupC => ( + Ereport { + class: EreportClass::A0TimeoutGroupC, + version: 0, + report: A0FailureEreport { + seq_status, + details: None, + }, + }, + SeqError::A0TimeoutGroupC, + ), + A0Failure::UnrecognizedCPU(coretype) => ( + Ereport { + class: EreportClass::A1Timeout, + version: 0, + report: A0FailureEreport { + seq_status, + details: Some(A0FailureDetails::UnrecognizedCPU { + coretype, + refdes: &HOST_CPU_REFDES, + }), + }, + }, + SeqError::UnrecognizedCPU, + ), + A0Failure::NoCPUPresent => ( + Ereport { + class: EreportClass::A1Timeout, + version: 0, + report: A0FailureEreport { + seq_status, + details: Some(A0FailureDetails::NoCPUPresent { + refdes: &HOST_CPU_REFDES, + }), + }, + }, + SeqError::CPUNotPresent, + ), + A0Failure::I2cFault { refdes, rail, err } => ( + Ereport { + class: EreportClass::I2cFault, + version: 0, + report: A0FailureEreport { + seq_status, + details: Some(A0FailureDetails::I2cErr { + i2c_err: err.into(), + // refdes is guaranteed to be <= + // MAX_COMPONENT_ID_LEN, so this will never panic. + refdes: FixedStr::from_str(refdes), + rail: FixedStr::try_from_str(rail).ok(), + }), + }, + }, + SeqError::I2cFault, + ), + }; + + deliver_ereport(&ereport, &self.packrat, self.ereport_buf); + err } @@ -1051,6 +1149,15 @@ impl ServerImpl { if ifr & thermtrip != 0 { self.seq.clear_bytes(Addr::IFR, &[thermtrip]).unwrap_lite(); self.update_state_internal(PowerState::A0Thermtrip); + deliver_ereport( + &Ereport { + class: EreportClass::Thermtrip, + version: 0, + report: &HOST_CPU_REFDES, + }, + &self.packrat, + self.ereport_buf, + ); } } @@ -1341,6 +1448,95 @@ fn read_spd_data_and_load_packrat( Ok(()) } +#[derive(Copy, Clone, Count, Eq, PartialEq)] +enum A0Failure { + UnrecognizedCPU(Coretype), + NoCPUPresent, + A1Timeout, + A0Timeout, + A0TimeoutGroupC, + I2cFault { + refdes: &'static str, + rail: &'static str, + #[count(children)] + err: i2c::ResponseCode, + }, +} + +#[derive(microcbor::EncodeFields)] +struct A0FailureEreport { + #[cbor(flatten)] + details: Option, + seq_status: SeqStatus, +} + +#[derive(microcbor::EncodeFields)] +enum A0FailureDetails { + UnrecognizedCPU { + #[cbor(flatten)] + refdes: &'static HostCpuRefdes, + #[cbor(flatten)] + coretype: Coretype, + }, + NoCPUPresent { + #[cbor(flatten)] + refdes: &'static HostCpuRefdes, + }, + I2cErr { + refdes: FixedStr<{ i2c_config::MAX_COMPONENT_ID_LEN }>, + // TODO(eliza): max rail len... + #[cbor(skip_if_nil)] + rail: Option>, + i2c_err: u32, + }, +} + +#[derive(microcbor::Encode)] +struct SeqStatus { + ifr: u8, + dbg_max_a0smstatus: u8, + max_groupb_pg: u8, + max_groupc_pg: u8, + flt_a0_smstatus: u8, + flt_groupb_pg: u8, + flt_groupc_pg: u8, +} + +#[derive(Copy, Clone, Eq, PartialEq, microcbor::EncodeFields)] +struct Coretype { + coretype: bool, + sp3r1: bool, + sp3r2: bool, +} + +#[derive(microcbor::EncodeFields)] +struct HostCpuRefdes { + refdes: FixedStr<2>, + dev_id: FixedStr<16>, +} + +static HOST_CPU_REFDES: HostCpuRefdes = HostCpuRefdes { + refdes: FixedStr::from_str("P0"), + // TODO(eliza): can we get this from the `gateway-sp-messages` crate? + dev_id: FixedStr::from_str("sp3-host-cpu"), +}; + +pub(crate) fn deliver_ereport>( + ereport: &Ereport, + packrat: &Packrat, + buf: &mut [u8], +) { + match packrat.encode_ereport(ereport, buf) { + Ok(len) => ringbuf_entry!(Trace::EreportSent(ereport.class, len)), + Err(task_packrat_api::EreportEncodeError::Packrat { len, err }) => { + ringbuf_entry!(Trace::EreportLost(ereport.class, len, err)) + } + Err(task_packrat_api::EreportEncodeError::Encoder(_)) => { + ringbuf_entry!(Trace::EreportTooBig(ereport.class)) + } + } +} + fn reprogram_fpga( spi: &SpiDevice, sys: &sys_api::Sys, @@ -1490,7 +1686,7 @@ cfg_if::cfg_if! { Ok(()) } - fn vcore_soc_on() -> Result<(), i2c::ResponseCode> { + fn vcore_soc_on() -> Result<(), A0Failure> { use drv_i2c_devices::raa229618::Raa229618; let i2c = I2C.get_task_id(); @@ -1500,8 +1696,18 @@ cfg_if::cfg_if! { let (device, rail) = i2c_config::pmbus::vddcr_soc(i2c); let mut vddcr_soc = Raa229618::new(&device, rail); - retry_i2c_txn(I2cTxn::VCoreOn, || vdd_vcore.turn_on())?; - retry_i2c_txn(I2cTxn::SocOn, || vddcr_soc.turn_on())?; + retry_i2c_txn(I2cTxn::VCoreOn, || vdd_vcore.turn_on()) + .map_err(|err| A0Failure::I2cFault { + refdes: vdd_vcore.i2c_device().component_id(), + rail: "VDD_VCORE", + err, + })?; + retry_i2c_txn(I2cTxn::SocOn, || vddcr_soc.turn_on()) + .map_err(|err| A0Failure::I2cFault { + refdes: vddcr_soc.i2c_device().component_id(), + rail: "VDDCR_SOC", + err, + })?; Ok(()) } diff --git a/drv/gimlet-seq-server/src/vcore.rs b/drv/gimlet-seq-server/src/vcore.rs index 011ef40c4..d42c9e4df 100644 --- a/drv/gimlet-seq-server/src/vcore.rs +++ b/drv/gimlet-seq-server/src/vcore.rs @@ -40,7 +40,26 @@ use userlib::{sys_get_timer, units}; pub struct VCore { device: Raa229618, sys: sys_api::Sys, - packrat: packrat_api::Packrat, +} + +#[derive(microcbor::EncodeFields)] +pub(super) struct PmbusEreport { + refdes: FixedStr<{ crate::i2c_config::MAX_COMPONENT_ID_LEN }>, + rail: &'static FixedStr<10>, + time: u64, + pwr_good: Option, + pmbus_status: PmbusStatus, +} + +#[derive(Copy, Clone, Default, microcbor::Encode)] +struct PmbusStatus { + word: Option, + input: Option, + iout: Option, + vout: Option, + temp: Option, + cml: Option, + mfr: Option, } #[derive(Copy, Clone, PartialEq)] @@ -61,9 +80,6 @@ enum Trace { StatusMfrSpecific(Result), Reading { timestamp: u64, volts: units::Volts }, Error(ResponseCode), - EreportSent(usize), - EreportLost(usize, packrat_api::EreportWriteError), - EreportTooBig, } ringbuf!(Trace, 120, Trace::None); @@ -97,16 +113,10 @@ cfg_if::cfg_if! { } impl VCore { - pub fn new( - sys: &sys_api::Sys, - packrat: packrat_api::Packrat, - device: &I2cDevice, - rail: u8, - ) -> Self { + pub fn new(sys: &sys_api::Sys, device: &I2cDevice, rail: u8) -> Self { Self { device: Raa229618::new(device, rail), sys: sys.clone(), - packrat, } } @@ -141,6 +151,7 @@ impl VCore { pub fn handle_notification( &self, + packrat: &packrat_api::Packrat, ereport_buf: &mut [u8; crate::EREPORT_BUF_LEN], ) { let now = sys_get_timer().now; @@ -152,7 +163,7 @@ impl VCore { }); if asserted { - self.read_pmbus_status(now, ereport_buf); + self.read_pmbus_status(now, packrat, ereport_buf); // Clear the fault now so that PMALERT_L is reasserted if a // subsequent fault occurs. Note that if the fault *condition* // continues, the fault bits in the status registers will remain @@ -168,6 +179,7 @@ impl VCore { fn read_pmbus_status( &self, now: u64, + packrat: &packrat_api::Packrat, ereport_buf: &mut [u8; crate::EREPORT_BUF_LEN], ) { use pmbus::commands::raa229618::STATUS_WORD; @@ -264,7 +276,7 @@ impl VCore { .map(|s| s.0); ringbuf_entry!(Trace::StatusMfrSpecific(status_mfr_specific)); - let status = super::PmbusStatus { + let status = PmbusStatus { word: status_word.map(|s| s.0).ok(), input: status_input.ok(), vout: status_vout.ok(), @@ -275,10 +287,10 @@ impl VCore { }; static RAIL: FixedStr<10> = FixedStr::from_str("VDD_VCORE"); - let ereport = packrat_api::Ereport { + let ereport = crate::Ereport { class: crate::EreportClass::PmbusAlert, version: 0, - report: crate::EreportKind::PmbusAlert { + report: PmbusEreport { refdes: FixedStr::from_str( self.device.i2c_device().component_id(), ), @@ -288,15 +300,8 @@ impl VCore { pmbus_status: status, }, }; - match self.packrat.encode_ereport(&ereport, &mut ereport_buf[..]) { - Ok(len) => ringbuf_entry!(Trace::EreportSent(len)), - Err(task_packrat_api::EreportEncodeError::Packrat { len, err }) => { - ringbuf_entry!(Trace::EreportLost(len, err)) - } - Err(task_packrat_api::EreportEncodeError::Encoder(_)) => { - ringbuf_entry!(Trace::EreportTooBig) - } - } + crate::deliver_ereport(&ereport, packrat, ereport_buf); + // TODO(eliza): if POWER_GOOD has been deasserted, we should produce a // subsequent ereport for that.