Skip to content

Commit 7173109

Browse files
oshpigelmanogabbay
authored andcommitted
habanalabs: add "in device creation" status
On init, the disabled state is cleared right before hw_init and that causes the device to report on "Operational" state before the device initialization is finished. Although the char device is not yet exposed to the user at this stage, the sysfs entries are exposed. This can cause errors in monitoring applications that use the sysfs entries. In order to avoid this, a new state "in device creation" is introduced to ne reported when the device is not disabled but is still in init flow. Signed-off-by: Omer Shpigelman <[email protected]> Reviewed-by: Oded Gabbay <[email protected]> Signed-off-by: Oded Gabbay <[email protected]>
1 parent e1b61f8 commit 7173109

File tree

5 files changed

+20
-17
lines changed

5 files changed

+20
-17
lines changed

drivers/misc/habanalabs/common/device.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
2323
status = HL_DEVICE_STATUS_NEEDS_RESET;
2424
else if (hdev->disabled)
2525
status = HL_DEVICE_STATUS_MALFUNCTION;
26+
else if (!hdev->init_done)
27+
status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
2628
else
2729
status = HL_DEVICE_STATUS_OPERATIONAL;
2830

@@ -44,6 +46,7 @@ bool hl_device_operational(struct hl_device *hdev,
4446
case HL_DEVICE_STATUS_NEEDS_RESET:
4547
return false;
4648
case HL_DEVICE_STATUS_OPERATIONAL:
49+
case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
4750
default:
4851
return true;
4952
}

drivers/misc/habanalabs/common/habanalabs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1990,7 +1990,7 @@ struct hl_state_dump_specs {
19901990

19911991
#define HL_STR_MAX 32
19921992

1993-
#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1)
1993+
#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1)
19941994

19951995
/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
19961996
* x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.

drivers/misc/habanalabs/common/habanalabs_drv.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,12 +317,16 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
317317
hdev->asic_prop.fw_security_enabled = false;
318318

319319
/* Assign status description string */
320-
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
321-
"disabled", HL_STR_MAX);
320+
strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL],
321+
"operational", HL_STR_MAX);
322322
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET],
323323
"in reset", HL_STR_MAX);
324+
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
325+
"disabled", HL_STR_MAX);
324326
strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET],
325327
"needs reset", HL_STR_MAX);
328+
strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
329+
"in device creation", HL_STR_MAX);
326330

327331
hdev->major = hl_major;
328332
hdev->reset_on_lockup = reset_on_lockup;

drivers/misc/habanalabs/common/sysfs.c

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99

1010
#include <linux/pci.h>
1111

12-
long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
13-
bool curr)
12+
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
1413
{
1514
struct cpucp_packet pkt;
1615
u32 used_pll_idx;
@@ -44,8 +43,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
4443
return (long) result;
4544
}
4645

47-
void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
48-
u64 freq)
46+
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
4947
{
5048
struct cpucp_packet pkt;
5149
u32 used_pll_idx;
@@ -285,16 +283,12 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
285283
char *buf)
286284
{
287285
struct hl_device *hdev = dev_get_drvdata(dev);
288-
char *str;
286+
char str[HL_STR_MAX];
289287

290-
if (atomic_read(&hdev->in_reset))
291-
str = "In reset";
292-
else if (hdev->disabled)
293-
str = "Malfunction";
294-
else if (hdev->needs_reset)
295-
str = "Needs Reset";
296-
else
297-
str = "Operational";
288+
strscpy(str, hdev->status[hl_device_status(hdev)], HL_STR_MAX);
289+
290+
/* use uppercase for backward compatibility */
291+
str[0] = 'A' + (str[0] - 'a');
298292

299293
return sprintf(buf, "%s\n", str);
300294
}

include/uapi/misc/habanalabs.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,9 @@ enum hl_device_status {
276276
HL_DEVICE_STATUS_OPERATIONAL,
277277
HL_DEVICE_STATUS_IN_RESET,
278278
HL_DEVICE_STATUS_MALFUNCTION,
279-
HL_DEVICE_STATUS_NEEDS_RESET
279+
HL_DEVICE_STATUS_NEEDS_RESET,
280+
HL_DEVICE_STATUS_IN_DEVICE_CREATION,
281+
HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
280282
};
281283

282284
enum hl_server_type {

0 commit comments

Comments
 (0)