Skip to content

Commit 31420f9

Browse files
mhaimovskiogabbay
authored andcommitted
accel/habanalabs: speedup h/w queues test in Gaudi2
HW queues testing at driver load and after reset takes a substantial amount of time. This commit reduces the queues test time in Gaudi2 devices by running all the tests in parallel instead of one after the other. Time measurements on tests duration shows that the new method is almost x100 faster than the serial approach. Signed-off-by: Moti Haimovski <[email protected]> Reviewed-by: Oded Gabbay <[email protected]> Signed-off-by: Oded Gabbay <[email protected]>
1 parent 91204e4 commit 31420f9

File tree

2 files changed

+128
-41
lines changed

2 files changed

+128
-41
lines changed

drivers/accel/habanalabs/gaudi2/gaudi2.c

Lines changed: 111 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3480,6 +3480,48 @@ static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
34803480
return gaudi2_special_blocks_config(hdev);
34813481
}
34823482

3483+
static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
3484+
{
3485+
struct gaudi2_device *gaudi2 = hdev->asic_specific;
3486+
struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3487+
int i;
3488+
3489+
for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3490+
/* bail-out if this is an allocation failure point */
3491+
if (!msg_info[i].kern_addr)
3492+
break;
3493+
3494+
hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
3495+
msg_info[i].kern_addr = NULL;
3496+
}
3497+
}
3498+
3499+
static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
3500+
{
3501+
struct gaudi2_device *gaudi2 = hdev->asic_specific;
3502+
struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3503+
int i, rc;
3504+
3505+
/* allocate a message-short buf for each Q we intend to test */
3506+
for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3507+
msg_info[i].kern_addr =
3508+
(void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
3509+
GFP_KERNEL, &msg_info[i].dma_addr);
3510+
if (!msg_info[i].kern_addr) {
3511+
dev_err(hdev->dev,
3512+
"Failed to allocate dma memory for H/W queue %d testing\n", i);
3513+
rc = -ENOMEM;
3514+
goto err_exit;
3515+
}
3516+
}
3517+
3518+
return 0;
3519+
3520+
err_exit:
3521+
gaudi2_test_queues_msgs_free(hdev);
3522+
return rc;
3523+
}
3524+
34833525
static int gaudi2_sw_init(struct hl_device *hdev)
34843526
{
34853527
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -3579,8 +3621,14 @@ static int gaudi2_sw_init(struct hl_device *hdev)
35793621
if (rc)
35803622
goto free_scratchpad_mem;
35813623

3624+
rc = gaudi2_test_queues_msgs_alloc(hdev);
3625+
if (rc)
3626+
goto special_blocks_free;
3627+
35823628
return 0;
35833629

3630+
special_blocks_free:
3631+
gaudi2_special_blocks_iterator_free(hdev);
35843632
free_scratchpad_mem:
35853633
hl_asic_dma_pool_free(hdev, gaudi2->scratchpad_kernel_address,
35863634
gaudi2->scratchpad_bus_address);
@@ -3603,6 +3651,8 @@ static int gaudi2_sw_fini(struct hl_device *hdev)
36033651
struct asic_fixed_properties *prop = &hdev->asic_prop;
36043652
struct gaudi2_device *gaudi2 = hdev->asic_specific;
36053653

3654+
gaudi2_test_queues_msgs_free(hdev);
3655+
36063656
gaudi2_special_blocks_iterator_free(hdev);
36073657

36083658
hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
@@ -6797,28 +6847,29 @@ static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, b
67976847
}
67986848
}
67996849

6800-
static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
6850+
static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
68016851
{
6802-
u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
6852+
return hdev->asic_prop.first_available_user_sob[0] +
6853+
hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
6854+
}
6855+
6856+
static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
6857+
{
6858+
u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
68036859
u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6804-
u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a;
6805-
struct packet_msg_short *msg_short_pkt;
6806-
dma_addr_t pkt_dma_addr;
6807-
size_t pkt_size;
6808-
int rc;
68096860

6810-
if (hdev->pldm)
6811-
timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6812-
else
6813-
timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6861+
/* Reset the SOB value */
6862+
WREG32(sob_addr, 0);
6863+
}
68146864

6815-
pkt_size = sizeof(*msg_short_pkt);
6816-
msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr);
6817-
if (!msg_short_pkt) {
6818-
dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n",
6819-
hw_queue_id);
6820-
return -ENOMEM;
6821-
}
6865+
static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
6866+
struct gaudi2_queues_test_info *msg_info)
6867+
{
6868+
u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6869+
u32 tmp, sob_base = 1;
6870+
struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
6871+
size_t pkt_size = sizeof(struct packet_msg_short);
6872+
int rc;
68226873

68236874
tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
68246875
(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
@@ -6829,15 +6880,25 @@ static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
68296880
msg_short_pkt->value = cpu_to_le32(sob_val);
68306881
msg_short_pkt->ctl = cpu_to_le32(tmp);
68316882

6832-
/* Reset the SOB value */
6833-
WREG32(sob_addr, 0);
6883+
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
6884+
if (rc)
6885+
dev_err(hdev->dev,
6886+
"Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
68346887

6835-
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
6836-
if (rc) {
6837-
dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n",
6838-
hw_queue_id);
6839-
goto free_pkt;
6840-
}
6888+
return rc;
6889+
}
6890+
6891+
static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
6892+
{
6893+
u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6894+
u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6895+
u32 timeout_usec, tmp;
6896+
int rc;
6897+
6898+
if (hdev->pldm)
6899+
timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6900+
else
6901+
timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
68416902

68426903
rc = hl_poll_timeout(
68436904
hdev,
@@ -6853,11 +6914,6 @@ static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
68536914
rc = -EIO;
68546915
}
68556916

6856-
/* Reset the SOB value */
6857-
WREG32(sob_addr, 0);
6858-
6859-
free_pkt:
6860-
hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr);
68616917
return rc;
68626918
}
68636919

@@ -6877,30 +6933,44 @@ static int gaudi2_test_cpu_queue(struct hl_device *hdev)
68776933

68786934
static int gaudi2_test_queues(struct hl_device *hdev)
68796935
{
6880-
int i, rc, ret_val = 0;
6936+
struct gaudi2_device *gaudi2 = hdev->asic_specific;
6937+
struct gaudi2_queues_test_info *msg_info;
6938+
u32 sob_val = 0x5a5a;
6939+
int i, rc;
68816940

6941+
/* send test message on all enabled Qs */
68826942
for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
68836943
if (!gaudi2_is_queue_enabled(hdev, i))
68846944
continue;
68856945

6946+
msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
68866947
gaudi2_qman_set_test_mode(hdev, i, true);
6887-
rc = gaudi2_test_queue(hdev, i);
6888-
gaudi2_qman_set_test_mode(hdev, i, false);
6889-
6890-
if (rc) {
6891-
ret_val = -EINVAL;
6948+
gaudi2_test_queue_clear(hdev, i);
6949+
rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
6950+
if (rc)
68926951
goto done;
6893-
}
68946952
}
68956953

68966954
rc = gaudi2_test_cpu_queue(hdev);
6897-
if (rc) {
6898-
ret_val = -EINVAL;
6955+
if (rc)
68996956
goto done;
6957+
6958+
/* verify that all messages were processed */
6959+
for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6960+
if (!gaudi2_is_queue_enabled(hdev, i))
6961+
continue;
6962+
6963+
rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
6964+
if (rc)
6965+
/* chip is not usable, no need for cleanups, just bail-out with error */
6966+
goto done;
6967+
6968+
gaudi2_test_queue_clear(hdev, i);
6969+
gaudi2_qman_set_test_mode(hdev, i, false);
69006970
}
69016971

69026972
done:
6903-
return ret_val;
6973+
return rc;
69046974
}
69056975

69066976
static int gaudi2_compute_reset_late_init(struct hl_device *hdev)

drivers/accel/habanalabs/gaudi2/gaudi2P.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,8 @@
240240
#define GAUDI2_SOB_INCREMENT_BY_ONE (FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \
241241
FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1))
242242

243+
#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
244+
243245
#define GAUDI2_NUM_OF_GLBL_ERR_CAUSE 8
244246

245247
enum gaudi2_reserved_sob_id {
@@ -452,6 +454,17 @@ struct dup_block_ctx {
452454
unsigned int instances;
453455
};
454456

457+
/**
458+
* struct gaudi2_queues_test_info - Holds the address of a the messages used for testing the
459+
* device queues.
460+
* @dma_addr: the address used by the HW for accessing the message.
461+
* @kern_addr: The address used by the driver for accessing the message.
462+
*/
463+
struct gaudi2_queues_test_info {
464+
dma_addr_t dma_addr;
465+
void *kern_addr;
466+
};
467+
455468
/**
456469
* struct gaudi2_device - ASIC specific manage structure.
457470
* @cpucp_info_get: get information on device from CPU-CP
@@ -510,6 +523,7 @@ struct dup_block_ctx {
510523
* @flush_db_fifo: flag to force flush DB FIFO after a write.
511524
* @hbm_cfg: HBM subsystem settings
512525
* @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock.
526+
* @queues_test_info: information used by the driver when testing the HW queues.
513527
*/
514528
struct gaudi2_device {
515529
int (*cpucp_info_get)(struct hl_device *hdev);
@@ -537,6 +551,9 @@ struct gaudi2_device {
537551
u32 events_stat[GAUDI2_EVENT_SIZE];
538552
u32 events_stat_aggregate[GAUDI2_EVENT_SIZE];
539553
u32 num_of_valid_hw_events;
554+
555+
/* Queue testing */
556+
struct gaudi2_queues_test_info queues_test_info[GAUDI2_NUM_TESTED_QS];
540557
};
541558

542559
/*

0 commit comments

Comments
 (0)