Skip to content

Commit 366ade2

Browse files
committed
tenstorrent: bh_arc: add support for receiving board input power
Add support for receiving board input power from the STM32. Add input power to the telemetry table Signed-off-by: Petra Alexson <[email protected]>
1 parent 48011a2 commit 366ade2

File tree

6 files changed

+309
-3
lines changed

6 files changed

+309
-3
lines changed

lib/tenstorrent/bh_arc/cm2bm_msg.c

Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
/*
2+
* Copyright (c) 2024 Tenstorrent AI ULC
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
/**
8+
* @file cm2bm_msg.c
9+
* @brief CMFW to BMFW message handling
10+
*
11+
*/
12+
13+
#include <string.h>
14+
#include <zephyr/kernel.h>
15+
#include <tenstorrent/msg_type.h>
16+
#include <tenstorrent/msgqueue.h>
17+
18+
#include "cm2bm_msg.h"
19+
#include "asic_state.h"
20+
#include "fan_ctrl.h"
21+
#include "telemetry.h"
22+
23+
typedef struct {
24+
uint8_t curr_msg_valid;
25+
uint8_t next_seq_num;
26+
Cm2BmSmbusReqMsg curr_msg;
27+
} Cm2BmMsgState;
28+
29+
static Cm2BmMsgState cm2bm_msg_state;
30+
static bool bmfw_ping_valid;
31+
static int32_t current;
32+
static uint32_t power;
33+
K_MSGQ_DEFINE(cm2bm_msg_q, sizeof(Cm2BmMsg), 4, _Alignof(Cm2BmMsg));
34+
35+
int32_t EnqueueCm2BmMsg(const Cm2BmMsg *msg)
36+
{
37+
/* May be called from ISR context, so keep this function ISR-safe */
38+
return k_msgq_put(&cm2bm_msg_q, msg, K_NO_WAIT);
39+
}
40+
41+
int32_t Cm2BmMsgReqSmbusHandler(uint8_t *data, uint8_t size)
42+
{
43+
BUILD_ASSERT(sizeof(cm2bm_msg_state.curr_msg) == 6,
44+
"Unexpected size of cm2bm_msg_state.curr_msg");
45+
if (size != sizeof(cm2bm_msg_state.curr_msg)) {
46+
return -1;
47+
}
48+
49+
if (!cm2bm_msg_state.curr_msg_valid) {
50+
/* See if there is a message in the queue */
51+
Cm2BmMsg msg;
52+
53+
if (k_msgq_get(&cm2bm_msg_q, &msg, K_NO_WAIT) != 0) {
54+
/* Send the all zero message if the message queue is empty */
55+
memset(data, 0, sizeof(cm2bm_msg_state.curr_msg));
56+
return 0;
57+
}
58+
59+
/* If there is a valid message, copy it over to the current message */
60+
cm2bm_msg_state.curr_msg_valid = 1;
61+
cm2bm_msg_state.curr_msg.msg_id = msg.msg_id;
62+
cm2bm_msg_state.curr_msg.seq_num = cm2bm_msg_state.next_seq_num++;
63+
cm2bm_msg_state.curr_msg.data = msg.data;
64+
}
65+
memcpy(data, &cm2bm_msg_state.curr_msg, sizeof(cm2bm_msg_state.curr_msg));
66+
return 0;
67+
}
68+
69+
int32_t Cm2BmMsgAckSmbusHandler(const uint8_t *data, uint8_t size)
70+
{
71+
BUILD_ASSERT(sizeof(Cm2BmSmbusAckMsg) == 2, "Unexpected size of Cm2BmSmbusAckMsg");
72+
if (size != sizeof(Cm2BmSmbusAckMsg)) {
73+
return -1;
74+
}
75+
76+
Cm2BmSmbusAckMsg *ack = (Cm2BmSmbusAckMsg *)data;
77+
78+
if (cm2bm_msg_state.curr_msg_valid && ack->msg_id == cm2bm_msg_state.curr_msg.msg_id &&
79+
ack->seq_num == cm2bm_msg_state.curr_msg.seq_num) {
80+
/* Message handled when msg_id and seq_num match the current valid message */
81+
cm2bm_msg_state.curr_msg_valid = 0;
82+
return 0;
83+
} else {
84+
return -1;
85+
}
86+
}
87+
88+
void IssueChipReset(uint32_t reset_level)
89+
{
90+
lock_down_for_reset();
91+
92+
/* Send a reset request to the BMFW */
93+
Cm2BmMsg msg = {
94+
.msg_id = kCm2BmMsgIdResetReq,
95+
.data = reset_level,
96+
};
97+
EnqueueCm2BmMsg(&msg);
98+
}
99+
100+
void ChipResetRequest(void *arg)
101+
{
102+
if (arg != NULL) {
103+
uint32_t irq_num = POINTER_TO_UINT(arg);
104+
105+
irq_disable(irq_num); /* So we don't get repeatedly interrupted */
106+
}
107+
108+
IssueChipReset(0);
109+
}
110+
111+
void UpdateFanSpeedRequest(uint32_t fan_speed)
112+
{
113+
Cm2BmMsg msg = {
114+
.msg_id = kCm2BmMsgIdFanSpeedUpdate,
115+
.data = fan_speed,
116+
};
117+
EnqueueCm2BmMsg(&msg);
118+
}
119+
120+
/* Report the current message and automatically acknowledge it. */
121+
int32_t ResetBoardByte(uint8_t *data, uint8_t size)
122+
{
123+
memset(data, 0, size);
124+
125+
if (!cm2bm_msg_state.curr_msg_valid) {
126+
/* See if there is a message in the queue */
127+
Cm2BmMsg msg;
128+
129+
if (k_msgq_get(&cm2bm_msg_q, &msg, K_NO_WAIT) != 0) {
130+
/* Send the all zero message if the message queue is empty */
131+
*data = 0;
132+
return 0;
133+
}
134+
135+
/* If there is a valid message, copy it over to the current message */
136+
cm2bm_msg_state.curr_msg_valid = 1;
137+
cm2bm_msg_state.curr_msg.msg_id = msg.msg_id;
138+
cm2bm_msg_state.curr_msg.seq_num = cm2bm_msg_state.next_seq_num++;
139+
cm2bm_msg_state.curr_msg.data = msg.data;
140+
}
141+
*data = cm2bm_msg_state.curr_msg.msg_id;
142+
143+
/* Because there's no acknowledgment coming, remove the message. */
144+
cm2bm_msg_state.curr_msg_valid = 0;
145+
146+
return 0;
147+
}
148+
149+
static uint8_t reset_bm_handler(uint32_t msg_code, const struct request *request,
150+
struct response *response)
151+
{
152+
uint8_t arg = request->data[1];
153+
154+
/* Don't expect a response from the bmfw so need to check here for a valid reset level */
155+
uint8_t ret = 0;
156+
157+
switch (arg) {
158+
case 0:
159+
case 3:
160+
IssueChipReset(arg);
161+
break;
162+
default:
163+
/* Can never be zero because that case is covered by asic reset */
164+
ret = arg;
165+
}
166+
167+
return ret;
168+
}
169+
170+
REGISTER_MESSAGE(MSG_TYPE_TRIGGER_RESET, reset_bm_handler);
171+
172+
static uint8_t ping_bm_handler(uint32_t msg_code, const struct request *request,
173+
struct response *response)
174+
{
175+
/* Send a ping to the bmfw */
176+
Cm2BmMsg msg = {
177+
.msg_id = kCm2BmMsgIdPing,
178+
};
179+
180+
bmfw_ping_valid = false;
181+
EnqueueCm2BmMsg(&msg);
182+
/* Delay to allow BMFW to respond */
183+
k_msleep(50);
184+
185+
/* Encode response from BMFW */
186+
response->data[1] = bmfw_ping_valid;
187+
return 0;
188+
}
189+
190+
REGISTER_MESSAGE(MSG_TYPE_PING_BM, ping_bm_handler);
191+
192+
int32_t Bm2CmSendDataHandler(const uint8_t *data, uint8_t size)
193+
{
194+
#ifndef CONFIG_TT_SMC_RECOVERY
195+
if (size != sizeof(bmStaticInfo)) {
196+
return -1;
197+
}
198+
199+
bmStaticInfo *info = (bmStaticInfo *)data;
200+
201+
if (info->version != 0) {
202+
UpdateBmFwVersion(info->bl_version, info->app_version);
203+
return 0;
204+
}
205+
#endif
206+
207+
return -1;
208+
}
209+
210+
int32_t Bm2CmPingHandler(const uint8_t *data, uint8_t size)
211+
{
212+
if (size != 2) {
213+
return -1;
214+
}
215+
216+
uint16_t response = *(uint16_t *)data;
217+
218+
if (response != 0xA5A5) {
219+
bmfw_ping_valid = false;
220+
return -1;
221+
}
222+
bmfw_ping_valid = true;
223+
return 0;
224+
}
225+
226+
int32_t Bm2CmSendCurrentHandler(const uint8_t *data, uint8_t size)
227+
{
228+
if (size != 4) {
229+
return -1;
230+
}
231+
232+
current = UNALIGNED_GET((uint32_t *)data);
233+
234+
return 0;
235+
}
236+
237+
int32_t Bm2CmSendPwrHandler(const uint8_t *data, uint8_t size)
238+
{
239+
if (size != 4) {
240+
return -1;
241+
}
242+
243+
power = UNALIGNED_GET((uint32_t *)data);
244+
245+
return 0;
246+
}
247+
248+
249+
/* TODO: Put these somewhere else? */
250+
int32_t GetInputCurrent(void)
251+
{
252+
return current;
253+
}
254+
255+
uint32_t GetInputPower(void)
256+
{
257+
return power;
258+
}
259+
260+
int32_t Bm2CmSendFanRPMHandler(const uint8_t *data, uint8_t size)
261+
{
262+
#ifndef CONFIG_TT_SMC_RECOVERY
263+
if (size != 2) {
264+
return -1;
265+
}
266+
267+
SetFanRPM(*(uint16_t *)data);
268+
269+
return 0;
270+
#endif
271+
272+
return -1;
273+
}

lib/tenstorrent/bh_arc/cm2dm_msg.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ typedef struct {
2929
static Cm2DmMsgState cm2dm_msg_state;
3030
static bool dmfw_ping_valid;
3131
static int32_t current;
32+
static uint32_t power;
3233
K_MSGQ_DEFINE(cm2dm_msg_q, sizeof(Cm2DmMsg), 4, _Alignof(Cm2DmMsg));
3334

3435
int32_t EnqueueCm2DmMsg(const Cm2DmMsg *msg)
@@ -241,12 +242,28 @@ int32_t Dm2CmSendCurrentHandler(const uint8_t *data, uint8_t size)
241242
return 0;
242243
}
243244

245+
int32_t Dm2CmSendPwrHandler(const uint8_t *data, uint8_t size)
246+
{
247+
if (size != 4) {
248+
return -1;
249+
}
250+
251+
power = *(uint32_t *)data;
252+
253+
return 0;
254+
}
255+
244256
/* TODO: Put these somewhere else? */
245257
int32_t GetInputCurrent(void)
246258
{
247259
return current;
248260
}
249261

262+
uint32_t GetInputPower(void)
263+
{
264+
return power;
265+
}
266+
250267
int32_t Dm2CmSendFanRPMHandler(const uint8_t *data, uint8_t size)
251268
{
252269
#ifndef CONFIG_TT_SMC_RECOVERY

lib/tenstorrent/bh_arc/cm2dm_msg.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@ typedef struct dmStaticInfo {
5353
int32_t Dm2CmSendDataHandler(const uint8_t *data, uint8_t size);
5454
int32_t Dm2CmPingHandler(const uint8_t *data, uint8_t size);
5555
int32_t Dm2CmSendCurrentHandler(const uint8_t *data, uint8_t size);
56+
int32_t Dm2CmSendPwrHandler(const uint8_t *data, uint8_t size);
5657
int32_t GetInputCurrent(void);
58+
uint32_t GetInputPower(void);
5759
int32_t Dm2CmSendFanRPMHandler(const uint8_t *data, uint8_t size);
5860

5961
#endif

lib/tenstorrent/bh_arc/smbus_target.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,13 @@ static SmbusConfig smbus_config = {
179179
[0x24] = {.valid = 1,
180180
.trans_type = kSmbusTransWriteWord,
181181
.handler = {.rcv_handler = &Dm2CmSetBoardPwrLimit}},
182+
[0x25] = {
183+
.valid = 1,
184+
.trans_type = kSmbusTransBlockWrite,
185+
.expected_blocksize = 4,
186+
.handler = {
187+
.rcv_handler = &Dm2CmSendPwrHandler
188+
}},
182189
#endif
183190
[0xD8] = {.valid = 1,
184191
.trans_type = kSmbusTransReadByte,

lib/tenstorrent/bh_arc/telemetry.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,9 @@ static void update_telemetry(void)
264264
UpdateGddrTelemetry();
265265
telemetry[MAX_GDDR_TEMP] = GetMaxGDDRTemp();
266266
telemetry[INPUT_CURRENT] =
267-
GetInputCurrent(); /* Input current - reported in A in signed int 16.16 format */
267+
GetInputCurrent(); /* Input current - reported in A in signed int 16.16 format */
268+
telemetry[INPUT_POWER] =
269+
GetInputPower(); /* Input power - reported in W in unsigned int 16.16 format */
268270
telemetry[TIMER_HEARTBEAT]++; /* Incremented every time the timer is called */
269271
SetPostCode(POST_CODE_SRC_CMFW, POST_CODE_TELEMETRY_END);
270272
}
@@ -323,7 +325,8 @@ static void update_tag_table(void)
323325
tag_table[49] = (struct telemetry_entry){TAG_MAX_GDDR_TEMP, MAX_GDDR_TEMP};
324326
tag_table[50] = (struct telemetry_entry){TAG_ASIC_LOCATION, ASIC_LOCATION};
325327
tag_table[51] = (struct telemetry_entry){TAG_BOARD_PWR_LIMIT, BOARD_PWR_LIMIT};
326-
tag_table[52] = (struct telemetry_entry){TAG_TELEM_ENUM_COUNT, TELEM_ENUM_COUNT};
328+
tag_table[52] = (struct telemetry_entry){TAG_INPUT_POWER, INPUT_POWER};
329+
tag_table[53] = (struct telemetry_entry){TAG_TELEM_ENUM_COUNT, TELEM_ENUM_COUNT};
327330
}
328331

329332
/* Handler functions for zephyr timer and worker objects */

lib/tenstorrent/bh_arc/telemetry.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
#define TAG_MAX_GDDR_TEMP 51
6969
#define TAG_ASIC_LOCATION 52
7070
#define TAG_BOARD_PWR_LIMIT 53
71+
#define TAG_INPUT_POWER 54
7172

7273
/* Enums are subject to updates */
7374
typedef enum {
@@ -117,11 +118,14 @@ typedef enum {
117118
L2CPU_FW_VERSION,
118119

119120
/* MISC */
121+
TIMER_HEARTBEAT, /* Incremented every time the timer is called */
122+
123+
/* Board telemetry */
120124
FAN_SPEED,
121125
FAN_RPM,
122-
TIMER_HEARTBEAT, /* Incremented every time the timer is called */
123126
INPUT_CURRENT,
124127
BOARD_PWR_LIMIT,
128+
INPUT_POWER,
125129

126130
/* Tile enablement/harvesting information */
127131
ENABLED_TENSIX_COL,

0 commit comments

Comments
 (0)