98
98
99
99
#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
100
100
101
+ #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
102
+ BIT(GAUDI_ENGINE_ID_MME_0) |\
103
+ BIT(GAUDI_ENGINE_ID_MME_2) |\
104
+ GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105
+
101
106
static const char gaudi_irq_name [GAUDI_MSI_ENTRIES ][GAUDI_MAX_STRING_LEN ] = {
102
107
"gaudi cq 0_0" , "gaudi cq 0_1" , "gaudi cq 0_2" , "gaudi cq 0_3" ,
103
108
"gaudi cq 1_0" , "gaudi cq 1_1" , "gaudi cq 1_2" , "gaudi cq 1_3" ,
@@ -106,14 +111,14 @@ static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
106
111
};
107
112
108
113
static const u8 gaudi_dma_assignment [GAUDI_DMA_MAX ] = {
109
- [GAUDI_PCI_DMA_1 ] = 0 ,
110
- [GAUDI_PCI_DMA_2 ] = 1 ,
111
- [GAUDI_PCI_DMA_3 ] = 5 ,
112
- [GAUDI_HBM_DMA_1 ] = 2 ,
113
- [GAUDI_HBM_DMA_2 ] = 3 ,
114
- [GAUDI_HBM_DMA_3 ] = 4 ,
115
- [GAUDI_HBM_DMA_4 ] = 6 ,
116
- [GAUDI_HBM_DMA_5 ] = 7
114
+ [GAUDI_PCI_DMA_1 ] = GAUDI_ENGINE_ID_DMA_0 ,
115
+ [GAUDI_PCI_DMA_2 ] = GAUDI_ENGINE_ID_DMA_1 ,
116
+ [GAUDI_PCI_DMA_3 ] = GAUDI_ENGINE_ID_DMA_5 ,
117
+ [GAUDI_HBM_DMA_1 ] = GAUDI_ENGINE_ID_DMA_2 ,
118
+ [GAUDI_HBM_DMA_2 ] = GAUDI_ENGINE_ID_DMA_3 ,
119
+ [GAUDI_HBM_DMA_3 ] = GAUDI_ENGINE_ID_DMA_4 ,
120
+ [GAUDI_HBM_DMA_4 ] = GAUDI_ENGINE_ID_DMA_6 ,
121
+ [GAUDI_HBM_DMA_5 ] = GAUDI_ENGINE_ID_DMA_7
117
122
};
118
123
119
124
static const u8 gaudi_cq_assignment [NUMBER_OF_CMPLT_QUEUES ] = {
@@ -1819,7 +1824,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
1819
1824
1820
1825
gaudi_init_rate_limiter (hdev );
1821
1826
1822
- gaudi_disable_clock_gating (hdev );
1827
+ hdev -> asic_funcs -> disable_clock_gating (hdev );
1823
1828
1824
1829
for (tpc_id = 0 , tpc_offset = 0 ;
1825
1830
tpc_id < TPC_NUMBER_OF_ENGINES ;
@@ -2531,46 +2536,55 @@ static void gaudi_tpc_stall(struct hl_device *hdev)
2531
2536
WREG32 (mmTPC7_CFG_TPC_STALL , 1 << TPC0_CFG_TPC_STALL_V_SHIFT );
2532
2537
}
2533
2538
2534
- static void gaudi_enable_clock_gating (struct hl_device * hdev )
2539
+ static void gaudi_set_clock_gating (struct hl_device * hdev )
2535
2540
{
2536
2541
struct gaudi_device * gaudi = hdev -> asic_specific ;
2537
2542
u32 qman_offset ;
2538
2543
int i ;
2539
2544
2540
- if (!hdev -> clock_gating )
2541
- return ;
2542
-
2543
- if (gaudi -> hw_cap_initialized & HW_CAP_CLK_GATE )
2544
- return ;
2545
-
2546
2545
/* In case we are during debug session, don't enable the clock gate
2547
2546
* as it may interfere
2548
2547
*/
2549
2548
if (hdev -> in_debug )
2550
2549
return ;
2551
2550
2552
- for (i = 0 , qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i ++ ) {
2551
+ for (i = GAUDI_PCI_DMA_1 , qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i ++ ) {
2552
+ if (!(hdev -> clock_gating_mask &
2553
+ (BIT_ULL (gaudi_dma_assignment [i ]))))
2554
+ continue ;
2555
+
2553
2556
qman_offset = gaudi_dma_assignment [i ] * DMA_QMAN_OFFSET ;
2554
2557
WREG32 (mmDMA0_QM_CGM_CFG1 + qman_offset , QMAN_CGM1_PWR_GATE_EN );
2555
2558
WREG32 (mmDMA0_QM_CGM_CFG + qman_offset ,
2556
2559
QMAN_UPPER_CP_CGM_PWR_GATE_EN );
2557
2560
}
2558
2561
2559
- for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i ++ ) {
2562
+ for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i ++ ) {
2563
+ if (!(hdev -> clock_gating_mask &
2564
+ (BIT_ULL (gaudi_dma_assignment [i ]))))
2565
+ continue ;
2566
+
2560
2567
qman_offset = gaudi_dma_assignment [i ] * DMA_QMAN_OFFSET ;
2561
2568
WREG32 (mmDMA0_QM_CGM_CFG1 + qman_offset , QMAN_CGM1_PWR_GATE_EN );
2562
2569
WREG32 (mmDMA0_QM_CGM_CFG + qman_offset ,
2563
2570
QMAN_COMMON_CP_CGM_PWR_GATE_EN );
2564
2571
}
2565
2572
2566
- WREG32 (mmMME0_QM_CGM_CFG1 , QMAN_CGM1_PWR_GATE_EN );
2567
- WREG32 (mmMME0_QM_CGM_CFG ,
2568
- QMAN_COMMON_CP_CGM_PWR_GATE_EN );
2569
- WREG32 (mmMME2_QM_CGM_CFG1 , QMAN_CGM1_PWR_GATE_EN );
2570
- WREG32 (mmMME2_QM_CGM_CFG ,
2571
- QMAN_COMMON_CP_CGM_PWR_GATE_EN );
2573
+ if (hdev -> clock_gating_mask & (BIT_ULL (GAUDI_ENGINE_ID_MME_0 ))) {
2574
+ WREG32 (mmMME0_QM_CGM_CFG1 , QMAN_CGM1_PWR_GATE_EN );
2575
+ WREG32 (mmMME0_QM_CGM_CFG , QMAN_COMMON_CP_CGM_PWR_GATE_EN );
2576
+ }
2577
+
2578
+ if (hdev -> clock_gating_mask & (BIT_ULL (GAUDI_ENGINE_ID_MME_2 ))) {
2579
+ WREG32 (mmMME2_QM_CGM_CFG1 , QMAN_CGM1_PWR_GATE_EN );
2580
+ WREG32 (mmMME2_QM_CGM_CFG , QMAN_COMMON_CP_CGM_PWR_GATE_EN );
2581
+ }
2572
2582
2573
2583
for (i = 0 , qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i ++ ) {
2584
+ if (!(hdev -> clock_gating_mask &
2585
+ (BIT_ULL (GAUDI_ENGINE_ID_TPC_0 + i ))))
2586
+ continue ;
2587
+
2574
2588
WREG32 (mmTPC0_QM_CGM_CFG1 + qman_offset ,
2575
2589
QMAN_CGM1_PWR_GATE_EN );
2576
2590
WREG32 (mmTPC0_QM_CGM_CFG + qman_offset ,
@@ -2663,7 +2677,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2663
2677
gaudi_stop_hbm_dma_qmans (hdev );
2664
2678
gaudi_stop_pci_dma_qmans (hdev );
2665
2679
2666
- gaudi_disable_clock_gating (hdev );
2680
+ hdev -> asic_funcs -> disable_clock_gating (hdev );
2667
2681
2668
2682
msleep (wait_timeout_ms );
2669
2683
@@ -3003,7 +3017,7 @@ static int gaudi_hw_init(struct hl_device *hdev)
3003
3017
3004
3018
gaudi_init_tpc_qmans (hdev );
3005
3019
3006
- gaudi_enable_clock_gating (hdev );
3020
+ hdev -> asic_funcs -> set_clock_gating (hdev );
3007
3021
3008
3022
gaudi_enable_timestamp (hdev );
3009
3023
@@ -3112,7 +3126,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3112
3126
HW_CAP_HBM_DMA | HW_CAP_PLL |
3113
3127
HW_CAP_MMU |
3114
3128
HW_CAP_SRAM_SCRAMBLER |
3115
- HW_CAP_HBM_SCRAMBLER );
3129
+ HW_CAP_HBM_SCRAMBLER |
3130
+ HW_CAP_CLK_GATE );
3131
+
3116
3132
memset (gaudi -> events_stat , 0 , sizeof (gaudi -> events_stat ));
3117
3133
}
3118
3134
@@ -4526,13 +4542,18 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4526
4542
int rc = 0 ;
4527
4543
4528
4544
if ((addr >= CFG_BASE ) && (addr < CFG_BASE + CFG_SIZE )) {
4529
- if (gaudi -> hw_cap_initialized & HW_CAP_CLK_GATE ) {
4545
+
4546
+ if ((gaudi -> hw_cap_initialized & HW_CAP_CLK_GATE ) &&
4547
+ (hdev -> clock_gating_mask &
4548
+ GAUDI_CLK_GATE_DEBUGFS_MASK )) {
4549
+
4530
4550
dev_err_ratelimited (hdev -> dev ,
4531
4551
"Can't read register - clock gating is enabled!\n" );
4532
4552
rc = - EFAULT ;
4533
4553
} else {
4534
4554
* val = RREG32 (addr - CFG_BASE );
4535
4555
}
4556
+
4536
4557
} else if ((addr >= SRAM_BASE_ADDR ) &&
4537
4558
(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE )) {
4538
4559
* val = readl (hdev -> pcie_bar [SRAM_BAR_ID ] +
@@ -4568,13 +4589,18 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4568
4589
int rc = 0 ;
4569
4590
4570
4591
if ((addr >= CFG_BASE ) && (addr < CFG_BASE + CFG_SIZE )) {
4571
- if (gaudi -> hw_cap_initialized & HW_CAP_CLK_GATE ) {
4592
+
4593
+ if ((gaudi -> hw_cap_initialized & HW_CAP_CLK_GATE ) &&
4594
+ (hdev -> clock_gating_mask &
4595
+ GAUDI_CLK_GATE_DEBUGFS_MASK )) {
4596
+
4572
4597
dev_err_ratelimited (hdev -> dev ,
4573
4598
"Can't write register - clock gating is enabled!\n" );
4574
4599
rc = - EFAULT ;
4575
4600
} else {
4576
4601
WREG32 (addr - CFG_BASE , val );
4577
4602
}
4603
+
4578
4604
} else if ((addr >= SRAM_BASE_ADDR ) &&
4579
4605
(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE )) {
4580
4606
writel (val , hdev -> pcie_bar [SRAM_BAR_ID ] +
@@ -4610,7 +4636,11 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4610
4636
int rc = 0 ;
4611
4637
4612
4638
if ((addr >= CFG_BASE ) && (addr <= CFG_BASE + CFG_SIZE - sizeof (u64 ))) {
4613
- if (gaudi -> hw_cap_initialized & HW_CAP_CLK_GATE ) {
4639
+
4640
+ if ((gaudi -> hw_cap_initialized & HW_CAP_CLK_GATE ) &&
4641
+ (hdev -> clock_gating_mask &
4642
+ GAUDI_CLK_GATE_DEBUGFS_MASK )) {
4643
+
4614
4644
dev_err_ratelimited (hdev -> dev ,
4615
4645
"Can't read register - clock gating is enabled!\n" );
4616
4646
rc = - EFAULT ;
@@ -4620,6 +4650,7 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4620
4650
4621
4651
* val = (((u64 ) val_h ) << 32 ) | val_l ;
4622
4652
}
4653
+
4623
4654
} else if ((addr >= SRAM_BASE_ADDR ) &&
4624
4655
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof (u64 ))) {
4625
4656
* val = readq (hdev -> pcie_bar [SRAM_BAR_ID ] +
@@ -4656,7 +4687,11 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4656
4687
int rc = 0 ;
4657
4688
4658
4689
if ((addr >= CFG_BASE ) && (addr <= CFG_BASE + CFG_SIZE - sizeof (u64 ))) {
4659
- if (gaudi -> hw_cap_initialized & HW_CAP_CLK_GATE ) {
4690
+
4691
+ if ((gaudi -> hw_cap_initialized & HW_CAP_CLK_GATE ) &&
4692
+ (hdev -> clock_gating_mask &
4693
+ GAUDI_CLK_GATE_DEBUGFS_MASK )) {
4694
+
4660
4695
dev_err_ratelimited (hdev -> dev ,
4661
4696
"Can't write register - clock gating is enabled!\n" );
4662
4697
rc = - EFAULT ;
@@ -4665,6 +4700,7 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4665
4700
WREG32 (addr + sizeof (u32 ) - CFG_BASE ,
4666
4701
upper_32_bits (val ));
4667
4702
}
4703
+
4668
4704
} else if ((addr >= SRAM_BASE_ADDR ) &&
4669
4705
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof (u64 ))) {
4670
4706
writeq (val , hdev -> pcie_bar [SRAM_BAR_ID ] +
@@ -4886,7 +4922,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4886
4922
gaudi_mmu_prepare_reg (hdev , mmPSOC_GLOBAL_CONF_TRACE_ARUSER , asid );
4887
4923
gaudi_mmu_prepare_reg (hdev , mmPSOC_GLOBAL_CONF_TRACE_AWUSER , asid );
4888
4924
4889
- hdev -> asic_funcs -> enable_clock_gating (hdev );
4925
+ hdev -> asic_funcs -> set_clock_gating (hdev );
4890
4926
4891
4927
mutex_unlock (& gaudi -> clk_gate_mutex );
4892
4928
}
@@ -5267,7 +5303,7 @@ static void gaudi_print_ecc_info_generic(struct hl_device *hdev,
5267
5303
}
5268
5304
5269
5305
if (disable_clock_gating ) {
5270
- hdev -> asic_funcs -> enable_clock_gating (hdev );
5306
+ hdev -> asic_funcs -> set_clock_gating (hdev );
5271
5307
mutex_unlock (& gaudi -> clk_gate_mutex );
5272
5308
}
5273
5309
}
@@ -5754,7 +5790,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5754
5790
/* Clear interrupts */
5755
5791
WREG32 (mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset , 0 );
5756
5792
5757
- hdev -> asic_funcs -> enable_clock_gating (hdev );
5793
+ hdev -> asic_funcs -> set_clock_gating (hdev );
5758
5794
5759
5795
mutex_unlock (& gaudi -> clk_gate_mutex );
5760
5796
@@ -6270,7 +6306,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
6270
6306
if (s )
6271
6307
seq_puts (s , "\n" );
6272
6308
6273
- hdev -> asic_funcs -> enable_clock_gating (hdev );
6309
+ hdev -> asic_funcs -> set_clock_gating (hdev );
6274
6310
6275
6311
mutex_unlock (& gaudi -> clk_gate_mutex );
6276
6312
@@ -6371,7 +6407,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6371
6407
dev_err (hdev -> dev ,
6372
6408
"Timeout while waiting for TPC%d icache prefetch\n" ,
6373
6409
tpc_id );
6374
- hdev -> asic_funcs -> enable_clock_gating (hdev );
6410
+ hdev -> asic_funcs -> set_clock_gating (hdev );
6375
6411
mutex_unlock (& gaudi -> clk_gate_mutex );
6376
6412
return - EIO ;
6377
6413
}
@@ -6400,7 +6436,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6400
6436
1000 ,
6401
6437
kernel_timeout );
6402
6438
6403
- hdev -> asic_funcs -> enable_clock_gating (hdev );
6439
+ hdev -> asic_funcs -> set_clock_gating (hdev );
6404
6440
mutex_unlock (& gaudi -> clk_gate_mutex );
6405
6441
6406
6442
if (rc ) {
@@ -6741,7 +6777,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
6741
6777
.mmu_invalidate_cache = gaudi_mmu_invalidate_cache ,
6742
6778
.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range ,
6743
6779
.send_heartbeat = gaudi_send_heartbeat ,
6744
- .enable_clock_gating = gaudi_enable_clock_gating ,
6780
+ .set_clock_gating = gaudi_set_clock_gating ,
6745
6781
.disable_clock_gating = gaudi_disable_clock_gating ,
6746
6782
.debug_coresight = gaudi_debug_coresight ,
6747
6783
.is_device_idle = gaudi_is_device_idle ,
0 commit comments