@@ -3480,6 +3480,48 @@ static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3480
3480
return gaudi2_special_blocks_config (hdev );
3481
3481
}
3482
3482
3483
+ static void gaudi2_test_queues_msgs_free (struct hl_device * hdev )
3484
+ {
3485
+ struct gaudi2_device * gaudi2 = hdev -> asic_specific ;
3486
+ struct gaudi2_queues_test_info * msg_info = gaudi2 -> queues_test_info ;
3487
+ int i ;
3488
+
3489
+ for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i ++ ) {
3490
+ /* bail-out if this is an allocation failure point */
3491
+ if (!msg_info [i ].kern_addr )
3492
+ break ;
3493
+
3494
+ hl_asic_dma_pool_free (hdev , msg_info [i ].kern_addr , msg_info [i ].dma_addr );
3495
+ msg_info [i ].kern_addr = NULL ;
3496
+ }
3497
+ }
3498
+
3499
+ static int gaudi2_test_queues_msgs_alloc (struct hl_device * hdev )
3500
+ {
3501
+ struct gaudi2_device * gaudi2 = hdev -> asic_specific ;
3502
+ struct gaudi2_queues_test_info * msg_info = gaudi2 -> queues_test_info ;
3503
+ int i , rc ;
3504
+
3505
+ /* allocate a message-short buf for each Q we intend to test */
3506
+ for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i ++ ) {
3507
+ msg_info [i ].kern_addr =
3508
+ (void * )hl_asic_dma_pool_zalloc (hdev , sizeof (struct packet_msg_short ),
3509
+ GFP_KERNEL , & msg_info [i ].dma_addr );
3510
+ if (!msg_info [i ].kern_addr ) {
3511
+ dev_err (hdev -> dev ,
3512
+ "Failed to allocate dma memory for H/W queue %d testing\n" , i );
3513
+ rc = - ENOMEM ;
3514
+ goto err_exit ;
3515
+ }
3516
+ }
3517
+
3518
+ return 0 ;
3519
+
3520
+ err_exit :
3521
+ gaudi2_test_queues_msgs_free (hdev );
3522
+ return rc ;
3523
+ }
3524
+
3483
3525
static int gaudi2_sw_init (struct hl_device * hdev )
3484
3526
{
3485
3527
struct asic_fixed_properties * prop = & hdev -> asic_prop ;
@@ -3579,8 +3621,14 @@ static int gaudi2_sw_init(struct hl_device *hdev)
3579
3621
if (rc )
3580
3622
goto free_scratchpad_mem ;
3581
3623
3624
+ rc = gaudi2_test_queues_msgs_alloc (hdev );
3625
+ if (rc )
3626
+ goto special_blocks_free ;
3627
+
3582
3628
return 0 ;
3583
3629
3630
+ special_blocks_free :
3631
+ gaudi2_special_blocks_iterator_free (hdev );
3584
3632
free_scratchpad_mem :
3585
3633
hl_asic_dma_pool_free (hdev , gaudi2 -> scratchpad_kernel_address ,
3586
3634
gaudi2 -> scratchpad_bus_address );
@@ -3603,6 +3651,8 @@ static int gaudi2_sw_fini(struct hl_device *hdev)
3603
3651
struct asic_fixed_properties * prop = & hdev -> asic_prop ;
3604
3652
struct gaudi2_device * gaudi2 = hdev -> asic_specific ;
3605
3653
3654
+ gaudi2_test_queues_msgs_free (hdev );
3655
+
3606
3656
gaudi2_special_blocks_iterator_free (hdev );
3607
3657
3608
3658
hl_cpu_accessible_dma_pool_free (hdev , prop -> pmmu .page_size , gaudi2 -> virt_msix_db_cpu_addr );
@@ -6797,28 +6847,29 @@ static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, b
6797
6847
}
6798
6848
}
6799
6849
6800
- static int gaudi2_test_queue (struct hl_device * hdev , u32 hw_queue_id )
6850
+ static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id (struct hl_device * hdev , u32 hw_queue_id )
6801
6851
{
6802
- u32 sob_offset = hdev -> asic_prop .first_available_user_sob [0 ] * 4 ;
6852
+ return hdev -> asic_prop .first_available_user_sob [0 ] +
6853
+ hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0 ;
6854
+ }
6855
+
6856
+ static void gaudi2_test_queue_clear (struct hl_device * hdev , u32 hw_queue_id )
6857
+ {
6858
+ u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id (hdev , hw_queue_id ) * 4 ;
6803
6859
u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset ;
6804
- u32 timeout_usec , tmp , sob_base = 1 , sob_val = 0x5a5a ;
6805
- struct packet_msg_short * msg_short_pkt ;
6806
- dma_addr_t pkt_dma_addr ;
6807
- size_t pkt_size ;
6808
- int rc ;
6809
6860
6810
- if (hdev -> pldm )
6811
- timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC ;
6812
- else
6813
- timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC ;
6861
+ /* Reset the SOB value */
6862
+ WREG32 (sob_addr , 0 );
6863
+ }
6814
6864
6815
- pkt_size = sizeof (* msg_short_pkt );
6816
- msg_short_pkt = hl_asic_dma_pool_zalloc (hdev , pkt_size , GFP_KERNEL , & pkt_dma_addr );
6817
- if (!msg_short_pkt ) {
6818
- dev_err (hdev -> dev , "Failed to allocate packet for H/W queue %d testing\n" ,
6819
- hw_queue_id );
6820
- return - ENOMEM ;
6821
- }
6865
+ static int gaudi2_test_queue_send_msg_short (struct hl_device * hdev , u32 hw_queue_id , u32 sob_val ,
6866
+ struct gaudi2_queues_test_info * msg_info )
6867
+ {
6868
+ u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id (hdev , hw_queue_id ) * 4 ;
6869
+ u32 tmp , sob_base = 1 ;
6870
+ struct packet_msg_short * msg_short_pkt = msg_info -> kern_addr ;
6871
+ size_t pkt_size = sizeof (struct packet_msg_short );
6872
+ int rc ;
6822
6873
6823
6874
tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT ) |
6824
6875
(1 << GAUDI2_PKT_CTL_EB_SHIFT ) |
@@ -6829,15 +6880,25 @@ static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
6829
6880
msg_short_pkt -> value = cpu_to_le32 (sob_val );
6830
6881
msg_short_pkt -> ctl = cpu_to_le32 (tmp );
6831
6882
6832
- /* Reset the SOB value */
6833
- WREG32 (sob_addr , 0 );
6883
+ rc = hl_hw_queue_send_cb_no_cmpl (hdev , hw_queue_id , pkt_size , msg_info -> dma_addr );
6884
+ if (rc )
6885
+ dev_err (hdev -> dev ,
6886
+ "Failed to send msg_short packet to H/W queue %d\n" , hw_queue_id );
6834
6887
6835
- rc = hl_hw_queue_send_cb_no_cmpl (hdev , hw_queue_id , pkt_size , pkt_dma_addr );
6836
- if (rc ) {
6837
- dev_err (hdev -> dev , "Failed to send msg_short packet to H/W queue %d\n" ,
6838
- hw_queue_id );
6839
- goto free_pkt ;
6840
- }
6888
+ return rc ;
6889
+ }
6890
+
6891
+ static int gaudi2_test_queue_wait_completion (struct hl_device * hdev , u32 hw_queue_id , u32 sob_val )
6892
+ {
6893
+ u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id (hdev , hw_queue_id ) * 4 ;
6894
+ u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset ;
6895
+ u32 timeout_usec , tmp ;
6896
+ int rc ;
6897
+
6898
+ if (hdev -> pldm )
6899
+ timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC ;
6900
+ else
6901
+ timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC ;
6841
6902
6842
6903
rc = hl_poll_timeout (
6843
6904
hdev ,
@@ -6853,11 +6914,6 @@ static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
6853
6914
rc = - EIO ;
6854
6915
}
6855
6916
6856
- /* Reset the SOB value */
6857
- WREG32 (sob_addr , 0 );
6858
-
6859
- free_pkt :
6860
- hl_asic_dma_pool_free (hdev , (void * ) msg_short_pkt , pkt_dma_addr );
6861
6917
return rc ;
6862
6918
}
6863
6919
@@ -6877,30 +6933,44 @@ static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6877
6933
6878
6934
static int gaudi2_test_queues (struct hl_device * hdev )
6879
6935
{
6880
- int i , rc , ret_val = 0 ;
6936
+ struct gaudi2_device * gaudi2 = hdev -> asic_specific ;
6937
+ struct gaudi2_queues_test_info * msg_info ;
6938
+ u32 sob_val = 0x5a5a ;
6939
+ int i , rc ;
6881
6940
6941
+ /* send test message on all enabled Qs */
6882
6942
for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i ++ ) {
6883
6943
if (!gaudi2_is_queue_enabled (hdev , i ))
6884
6944
continue ;
6885
6945
6946
+ msg_info = & gaudi2 -> queues_test_info [i - GAUDI2_QUEUE_ID_PDMA_0_0 ];
6886
6947
gaudi2_qman_set_test_mode (hdev , i , true);
6887
- rc = gaudi2_test_queue (hdev , i );
6888
- gaudi2_qman_set_test_mode (hdev , i , false);
6889
-
6890
- if (rc ) {
6891
- ret_val = - EINVAL ;
6948
+ gaudi2_test_queue_clear (hdev , i );
6949
+ rc = gaudi2_test_queue_send_msg_short (hdev , i , sob_val , msg_info );
6950
+ if (rc )
6892
6951
goto done ;
6893
- }
6894
6952
}
6895
6953
6896
6954
rc = gaudi2_test_cpu_queue (hdev );
6897
- if (rc ) {
6898
- ret_val = - EINVAL ;
6955
+ if (rc )
6899
6956
goto done ;
6957
+
6958
+ /* verify that all messages were processed */
6959
+ for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i ++ ) {
6960
+ if (!gaudi2_is_queue_enabled (hdev , i ))
6961
+ continue ;
6962
+
6963
+ rc = gaudi2_test_queue_wait_completion (hdev , i , sob_val );
6964
+ if (rc )
6965
+ /* chip is not usable, no need for cleanups, just bail-out with error */
6966
+ goto done ;
6967
+
6968
+ gaudi2_test_queue_clear (hdev , i );
6969
+ gaudi2_qman_set_test_mode (hdev , i , false);
6900
6970
}
6901
6971
6902
6972
done :
6903
- return ret_val ;
6973
+ return rc ;
6904
6974
}
6905
6975
6906
6976
static int gaudi2_compute_reset_late_init (struct hl_device * hdev )
0 commit comments