Skip to content

Commit 3a41d93

Browse files
committed
tenstorrent: bh_chip: add pgood fault interrupt
Adds interrupts on pgood gpio fall and rise to enable the pgood fault sequence. When a pgood fault is detected, the board fault LED is turned on and ASIC reset is asserted. If pgood comes back up, then the LED is cleared and the ASIC is brought out of reset. If it falls again within 1 second, the pgood severe fault state is entered and can only be cleared with a power cycle. Signed-off-by: Petra Alexson <[email protected]>
1 parent 9e8c7cc commit 3a41d93

File tree

3 files changed

+93
-15
lines changed

3 files changed

+93
-15
lines changed

app/dmc/src/main.c

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -221,14 +221,6 @@ int main(void)
221221
}
222222
}
223223

224-
ARRAY_FOR_EACH_PTR(BH_CHIPS, chip) {
225-
ret = therm_trip_gpio_setup(chip);
226-
if (ret != 0) {
227-
LOG_ERR("%s() failed: %d", "therm_trip_gpio_setup", ret);
228-
return ret;
229-
}
230-
}
231-
232224
if (IS_ENABLED(CONFIG_TT_FWUPDATE)) {
233225
if (!tt_fwupdate_is_confirmed()) {
234226
if (bist_rc < 0) {
@@ -267,10 +259,24 @@ int main(void)
267259
}
268260
}
269261

270-
if (IS_ENABLED(CONFIG_TT_ASSEMBLY_TEST) && board_fault_led.port != NULL) {
262+
/* Set up GPIOs */
263+
if (board_fault_led.port != NULL) {
271264
gpio_pin_configure_dt(&board_fault_led, GPIO_OUTPUT_INACTIVE);
272265
}
273266

267+
ARRAY_FOR_EACH_PTR(BH_CHIPS, chip) {
268+
ret = therm_trip_gpio_setup(chip);
269+
if (ret != 0) {
270+
LOG_ERR("%s() failed: %d", "therm_trip_gpio_setup", ret);
271+
return ret;
272+
}
273+
ret = pgood_gpio_setup(chip);
274+
if (ret != 0) {
275+
LOG_ERR("%s() failed: %d", "pgood_gpio_setup", ret);
276+
return ret;
277+
}
278+
}
279+
274280
if (IS_ENABLED(CONFIG_JTAG_LOAD_BOOTROM)) {
275281
ARRAY_FOR_EACH_PTR(BH_CHIPS, chip) {
276282
ret = jtag_bootrom_init(chip);
@@ -337,6 +343,33 @@ int main(void)
337343
}
338344
}
339345

346+
/* handler for PGOOD */
347+
ARRAY_FOR_EACH_PTR(BH_CHIPS, chip) {
348+
if (chip->data.pgood_fall_triggered && !chip->data.pgood_severe_fault) {
349+
int64_t current_uptime_ms = k_uptime_get();
350+
/* Assert board fault */
351+
gpio_pin_set_dt(&board_fault_led, 1);
352+
/* Report over SMBus - to add later */
353+
/* Assert ASIC reset */
354+
bh_chip_assert_asic_reset(chip);
355+
/* If pgood went down again within 1 second */
356+
if (chip->data.pgood_last_trip_ms != 0 &&
357+
current_uptime_ms - chip->data.pgood_last_trip_ms < 1000) {
358+
/* Assert more severe fault over IPMI - to add later */
359+
chip->data.pgood_severe_fault = true;
360+
}
361+
chip->data.pgood_last_trip_ms = current_uptime_ms;
362+
chip->data.pgood_fall_triggered = false;
363+
}
364+
if (chip->data.pgood_rise_triggered && !chip->data.pgood_severe_fault) {
365+
/* Follow out of reset procedure */
366+
bh_chip_reset_chip(chip, true);
367+
/* Clear board fault */
368+
gpio_pin_set_dt(&board_fault_led, 0);
369+
chip->data.pgood_rise_triggered = false;
370+
}
371+
}
372+
340373
/* TODO(drosen): Turn this into a task which will re-arm until static data is sent
341374
*/
342375
ARRAY_FOR_EACH_PTR(BH_CHIPS, chip) {

include/tenstorrent/bh_chip.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,20 @@ struct bh_chip_data {
5454
bool trigger_reset;
5555

5656
/* notify the main thread to handle therm trip */
57-
bool therm_trip_triggered;
57+
volatile bool therm_trip_triggered;
58+
59+
/* notify the main thread to handle pgood events */
60+
volatile bool pgood_fall_triggered;
61+
volatile bool pgood_rise_triggered;
62+
bool pgood_severe_fault;
63+
int64_t pgood_last_trip_ms;
5864
};
5965

6066
struct bh_chip {
6167
const struct bh_chip_config config;
6268
struct bh_chip_data data;
6369
struct gpio_callback therm_trip_cb;
70+
struct gpio_callback pgood_cb;
6471
};
6572

6673
#define DT_PHANDLE_OR_CHILD(node_id, name) \
@@ -103,8 +110,8 @@ extern struct bh_chip BH_CHIPS[BH_CHIP_COUNT];
103110
HAS_DT_PHANDLE_OR_CHILD(DT_PHANDLE_BY_IDX(n, prop, idx), strapping), \
104111
(DT_FOREACH_CHILD(DT_PHANDLE_OR_CHILD(DT_PHANDLE_BY_IDX(n, prop, idx), strapping), \
105112
INIT_STRAP)), \
106-
())}, \
107-
}, \
113+
())}, \
114+
}, \
108115
},
109116

110117
#define BH_CHIP_PRIMARY_INDEX DT_PROP(DT_PATH(chips), primary)
@@ -132,6 +139,9 @@ void bh_chip_deassert_spi_reset(const struct bh_chip *chip);
132139
int bh_chip_reset_chip(struct bh_chip *chip, bool force_reset);
133140

134141
int therm_trip_gpio_setup(struct bh_chip *chip);
142+
int pgood_gpio_setup(struct bh_chip *chip);
143+
144+
void pgood_fault_work_handler(struct k_work *work);
135145

136146
#ifdef __cplusplus
137147
}

lib/tenstorrent/bh_chip/bh_chip.c

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ int bh_chip_reset_chip(struct bh_chip *chip, bool force_reset)
113113
void therm_trip_detected(const struct device *dev, struct gpio_callback *cb, uint32_t pins)
114114
{
115115
struct bh_chip *chip = CONTAINER_OF(cb, struct bh_chip, therm_trip_cb);
116+
116117
chip->data.therm_trip_triggered = true;
117118
bh_chip_cancel_bus_transfer_set(chip);
118119
tt_event_post(TT_EVENT_WAKE);
@@ -127,13 +128,47 @@ int therm_trip_gpio_setup(struct bh_chip *chip)
127128
if (ret != 0) {
128129
return ret;
129130
}
131+
gpio_init_callback(&chip->therm_trip_cb, therm_trip_detected,
132+
BIT(chip->config.therm_trip.pin));
133+
ret = gpio_add_callback_dt(&chip->config.therm_trip, &chip->therm_trip_cb);
134+
if (ret != 0) {
135+
return ret;
136+
}
130137
ret = gpio_pin_interrupt_configure_dt(&chip->config.therm_trip, GPIO_INT_EDGE_TO_ACTIVE);
138+
139+
return ret;
140+
}
141+
142+
void pgood_change_detected(const struct device *dev, struct gpio_callback *cb, uint32_t pins)
143+
{
144+
struct bh_chip *chip = CONTAINER_OF(cb, struct bh_chip, pgood_cb);
145+
146+
/* Sample PGOOD to see if it rose or fell */
147+
/* TODO: could setup rising interrupt only after falling triggered */
148+
if (gpio_pin_get_dt(&chip->config.pgood)) {
149+
chip->data.pgood_rise_triggered = true;
150+
} else {
151+
chip->data.pgood_fall_triggered = true;
152+
}
153+
tt_event_post(TT_EVENT_WAKE);
154+
}
155+
156+
int pgood_gpio_setup(struct bh_chip *chip)
157+
{
158+
/* Set up PGOOD interrupt */
159+
int ret;
160+
161+
ret = gpio_pin_configure_dt(&chip->config.pgood, GPIO_INPUT);
131162
if (ret != 0) {
132163
return ret;
133164
}
134-
gpio_init_callback(&chip->therm_trip_cb, therm_trip_detected,
135-
BIT(chip->config.therm_trip.pin));
136-
ret = gpio_add_callback(chip->config.therm_trip.port, &chip->therm_trip_cb);
165+
gpio_init_callback(&chip->pgood_cb, pgood_change_detected, BIT(chip->config.pgood.pin));
166+
ret = gpio_add_callback_dt(&chip->config.pgood, &chip->pgood_cb);
167+
if (ret != 0) {
168+
return ret;
169+
}
170+
171+
ret = gpio_pin_interrupt_configure_dt(&chip->config.pgood, GPIO_INT_EDGE_BOTH);
137172

138173
return ret;
139174
}

0 commit comments

Comments
 (0)