Skip to content

Commit b11f623

Browse files
committed
Merge tag 'misc-habanalabs-fixes-2021-01-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-linus
Oded writes: This tag contains the following bug fixes for 5.11-rc5/6: - Clear the fence field in the PCI counters packet before sending the packet to the F/W. Not clearing it might cause the driver and F/W to get out-of-sync - Fix backward compatibility in the uapi of IDLE check that is part of the INFO IOCTL. - Tell the F/W to not access the Host (device outbound) while the driver removes the device. If that happens, the server might crash. * tag 'misc-habanalabs-fixes-2021-01-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: habanalabs: disable FW events on device removal habanalabs: fix backward compatibility of idle check habanalabs: zero pci counters packet before submit to FW
2 parents cb5c681 + 2dc4a6d commit b11f623

File tree

3 files changed

+16
-0
lines changed

3 files changed

+16
-0
lines changed

drivers/misc/habanalabs/common/device.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1487,6 +1487,15 @@ void hl_device_fini(struct hl_device *hdev)
14871487
}
14881488
}
14891489

1490+
/* Disable PCI access from device F/W so it won't send us additional
1491+
* interrupts. We disable MSI/MSI-X at the halt_engines function and we
1492+
* can't have the F/W sending us interrupts after that. We need to
1493+
* disable the access here because if the device is marked disable, the
1494+
* message won't be send. Also, in case of heartbeat, the device CPU is
1495+
* marked as disable so this message won't be sent
1496+
*/
1497+
hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1498+
14901499
/* Mark device as disabled */
14911500
hdev->disabled = true;
14921501

drivers/misc/habanalabs/common/firmware_if.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,10 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
402402
}
403403
counters->rx_throughput = result;
404404

405+
memset(&pkt, 0, sizeof(pkt));
406+
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
407+
CPUCP_PKT_CTL_OPCODE_SHIFT);
408+
405409
/* Fetch PCI tx counter */
406410
pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
407411
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -414,6 +418,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
414418
counters->tx_throughput = result;
415419

416420
/* Fetch PCI replay counter */
421+
memset(&pkt, 0, sizeof(pkt));
417422
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
418423
CPUCP_PKT_CTL_OPCODE_SHIFT);
419424

drivers/misc/habanalabs/common/habanalabs_ioctl.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
133133

134134
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
135135
&hw_idle.busy_engines_mask_ext, NULL);
136+
hw_idle.busy_engines_mask =
137+
lower_32_bits(hw_idle.busy_engines_mask_ext);
136138

137139
return copy_to_user(out, &hw_idle,
138140
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;

0 commit comments

Comments
 (0)