Skip to content

Commit e38bfd3

Browse files
committed
habanalabs: set clock gating per engine
For debugging purposes, we need to allow the root user better control of the clock gating feature of the DMA and compute engines. Therefore, change the clock gating debugfs interface to be bitmask instead of true/false. Each bit represents a different engine, according to gaudi_engine_id enum. See debugfs documentation for more details. Signed-off-by: Oded Gabbay <[email protected]> Reviewed-by: Omer Shpigelman <[email protected]>
1 parent 2edc66e commit e38bfd3

File tree

7 files changed

+103
-62
lines changed

7 files changed

+103
-62
lines changed

Documentation/ABI/testing/debugfs-driver-habanalabs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,16 @@ Description: Allow the root user to disable/enable in runtime the clock
1616
gating mechanism in Gaudi. Due to how Gaudi is built, the
1717
clock gating needs to be disabled in order to access the
1818
registers of the TPC and MME engines. This is sometimes needed
19-
during debug by the user and hence the user needs this option
19+
during debug by the user and hence the user needs this option.
20+
The user can supply a bitmask value, each bit represents
21+
a different engine to disable/enable its clock gating feature.
22+
The bitmask is composed of 20 bits:
23+
0 - 7 : DMA channels
24+
8 - 11 : MME engines
25+
12 - 19 : TPC engines
26+
The bit's location of a specific engine can be determined
27+
using (1 << GAUDI_ENGINE_ID_*). GAUDI_ENGINE_ID_* values
28+
are defined in uapi habanalabs.h file in enum gaudi_engine_id
2029

2130
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
2231
Date: Jan 2019

drivers/misc/habanalabs/debugfs.c

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -981,7 +981,7 @@ static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
981981
if (*ppos)
982982
return 0;
983983

984-
sprintf(tmp_buf, "%d\n", hdev->clock_gating);
984+
sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask);
985985
rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
986986
strlen(tmp_buf) + 1);
987987

@@ -993,7 +993,7 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
993993
{
994994
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
995995
struct hl_device *hdev = entry->hdev;
996-
u32 value;
996+
u64 value;
997997
ssize_t rc;
998998

999999
if (atomic_read(&hdev->in_reset)) {
@@ -1002,19 +1002,12 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
10021002
return 0;
10031003
}
10041004

1005-
rc = kstrtouint_from_user(buf, count, 10, &value);
1005+
rc = kstrtoull_from_user(buf, count, 16, &value);
10061006
if (rc)
10071007
return rc;
10081008

1009-
if (value) {
1010-
hdev->clock_gating = 1;
1011-
if (hdev->asic_funcs->enable_clock_gating)
1012-
hdev->asic_funcs->enable_clock_gating(hdev);
1013-
} else {
1014-
if (hdev->asic_funcs->disable_clock_gating)
1015-
hdev->asic_funcs->disable_clock_gating(hdev);
1016-
hdev->clock_gating = 0;
1017-
}
1009+
hdev->clock_gating_mask = value;
1010+
hdev->asic_funcs->set_clock_gating(hdev);
10181011

10191012
return count;
10201013
}

drivers/misc/habanalabs/device.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,7 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
608608
hdev->in_debug = 0;
609609

610610
if (!hdev->hard_reset_pending)
611-
hdev->asic_funcs->enable_clock_gating(hdev);
611+
hdev->asic_funcs->set_clock_gating(hdev);
612612

613613
goto out;
614614
}

drivers/misc/habanalabs/gaudi/gaudi.c

Lines changed: 74 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,11 @@
9898

9999
#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
100100

101+
#define GAUDI_CLK_GATE_DEBUGFS_MASK (\
102+
BIT(GAUDI_ENGINE_ID_MME_0) |\
103+
BIT(GAUDI_ENGINE_ID_MME_2) |\
104+
GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105+
101106
static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
102107
"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
103108
"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
@@ -106,14 +111,14 @@ static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
106111
};
107112

108113
static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
109-
[GAUDI_PCI_DMA_1] = 0,
110-
[GAUDI_PCI_DMA_2] = 1,
111-
[GAUDI_PCI_DMA_3] = 5,
112-
[GAUDI_HBM_DMA_1] = 2,
113-
[GAUDI_HBM_DMA_2] = 3,
114-
[GAUDI_HBM_DMA_3] = 4,
115-
[GAUDI_HBM_DMA_4] = 6,
116-
[GAUDI_HBM_DMA_5] = 7
114+
[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115+
[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
116+
[GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
117+
[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
118+
[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
119+
[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
120+
[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
121+
[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
117122
};
118123

119124
static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
@@ -1819,7 +1824,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
18191824

18201825
gaudi_init_rate_limiter(hdev);
18211826

1822-
gaudi_disable_clock_gating(hdev);
1827+
hdev->asic_funcs->disable_clock_gating(hdev);
18231828

18241829
for (tpc_id = 0, tpc_offset = 0;
18251830
tpc_id < TPC_NUMBER_OF_ENGINES;
@@ -2531,46 +2536,55 @@ static void gaudi_tpc_stall(struct hl_device *hdev)
25312536
WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
25322537
}
25332538

2534-
static void gaudi_enable_clock_gating(struct hl_device *hdev)
2539+
static void gaudi_set_clock_gating(struct hl_device *hdev)
25352540
{
25362541
struct gaudi_device *gaudi = hdev->asic_specific;
25372542
u32 qman_offset;
25382543
int i;
25392544

2540-
if (!hdev->clock_gating)
2541-
return;
2542-
2543-
if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)
2544-
return;
2545-
25462545
/* In case we are during debug session, don't enable the clock gate
25472546
* as it may interfere
25482547
*/
25492548
if (hdev->in_debug)
25502549
return;
25512550

2552-
for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2551+
for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
2552+
if (!(hdev->clock_gating_mask &
2553+
(BIT_ULL(gaudi_dma_assignment[i]))))
2554+
continue;
2555+
25532556
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
25542557
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
25552558
WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
25562559
QMAN_UPPER_CP_CGM_PWR_GATE_EN);
25572560
}
25582561

2559-
for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2562+
for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
2563+
if (!(hdev->clock_gating_mask &
2564+
(BIT_ULL(gaudi_dma_assignment[i]))))
2565+
continue;
2566+
25602567
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
25612568
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
25622569
WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
25632570
QMAN_COMMON_CP_CGM_PWR_GATE_EN);
25642571
}
25652572

2566-
WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2567-
WREG32(mmMME0_QM_CGM_CFG,
2568-
QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2569-
WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2570-
WREG32(mmMME2_QM_CGM_CFG,
2571-
QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2573+
if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))) {
2574+
WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2575+
WREG32(mmMME0_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2576+
}
2577+
2578+
if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))) {
2579+
WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2580+
WREG32(mmMME2_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2581+
}
25722582

25732583
for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2584+
if (!(hdev->clock_gating_mask &
2585+
(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i))))
2586+
continue;
2587+
25742588
WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
25752589
QMAN_CGM1_PWR_GATE_EN);
25762590
WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
@@ -2663,7 +2677,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
26632677
gaudi_stop_hbm_dma_qmans(hdev);
26642678
gaudi_stop_pci_dma_qmans(hdev);
26652679

2666-
gaudi_disable_clock_gating(hdev);
2680+
hdev->asic_funcs->disable_clock_gating(hdev);
26672681

26682682
msleep(wait_timeout_ms);
26692683

@@ -3003,7 +3017,7 @@ static int gaudi_hw_init(struct hl_device *hdev)
30033017

30043018
gaudi_init_tpc_qmans(hdev);
30053019

3006-
gaudi_enable_clock_gating(hdev);
3020+
hdev->asic_funcs->set_clock_gating(hdev);
30073021

30083022
gaudi_enable_timestamp(hdev);
30093023

@@ -3112,7 +3126,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
31123126
HW_CAP_HBM_DMA | HW_CAP_PLL |
31133127
HW_CAP_MMU |
31143128
HW_CAP_SRAM_SCRAMBLER |
3115-
HW_CAP_HBM_SCRAMBLER);
3129+
HW_CAP_HBM_SCRAMBLER |
3130+
HW_CAP_CLK_GATE);
3131+
31163132
memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
31173133
}
31183134

@@ -4526,13 +4542,18 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
45264542
int rc = 0;
45274543

45284544
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4529-
if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4545+
4546+
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4547+
(hdev->clock_gating_mask &
4548+
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4549+
45304550
dev_err_ratelimited(hdev->dev,
45314551
"Can't read register - clock gating is enabled!\n");
45324552
rc = -EFAULT;
45334553
} else {
45344554
*val = RREG32(addr - CFG_BASE);
45354555
}
4556+
45364557
} else if ((addr >= SRAM_BASE_ADDR) &&
45374558
(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
45384559
*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
@@ -4568,13 +4589,18 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
45684589
int rc = 0;
45694590

45704591
if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4571-
if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4592+
4593+
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4594+
(hdev->clock_gating_mask &
4595+
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4596+
45724597
dev_err_ratelimited(hdev->dev,
45734598
"Can't write register - clock gating is enabled!\n");
45744599
rc = -EFAULT;
45754600
} else {
45764601
WREG32(addr - CFG_BASE, val);
45774602
}
4603+
45784604
} else if ((addr >= SRAM_BASE_ADDR) &&
45794605
(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
45804606
writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
@@ -4610,7 +4636,11 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
46104636
int rc = 0;
46114637

46124638
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4613-
if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4639+
4640+
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4641+
(hdev->clock_gating_mask &
4642+
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4643+
46144644
dev_err_ratelimited(hdev->dev,
46154645
"Can't read register - clock gating is enabled!\n");
46164646
rc = -EFAULT;
@@ -4620,6 +4650,7 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
46204650

46214651
*val = (((u64) val_h) << 32) | val_l;
46224652
}
4653+
46234654
} else if ((addr >= SRAM_BASE_ADDR) &&
46244655
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
46254656
*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
@@ -4656,7 +4687,11 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
46564687
int rc = 0;
46574688

46584689
if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4659-
if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4690+
4691+
if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4692+
(hdev->clock_gating_mask &
4693+
GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4694+
46604695
dev_err_ratelimited(hdev->dev,
46614696
"Can't write register - clock gating is enabled!\n");
46624697
rc = -EFAULT;
@@ -4665,6 +4700,7 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
46654700
WREG32(addr + sizeof(u32) - CFG_BASE,
46664701
upper_32_bits(val));
46674702
}
4703+
46684704
} else if ((addr >= SRAM_BASE_ADDR) &&
46694705
(addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
46704706
writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
@@ -4886,7 +4922,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
48864922
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
48874923
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
48884924

4889-
hdev->asic_funcs->enable_clock_gating(hdev);
4925+
hdev->asic_funcs->set_clock_gating(hdev);
48904926

48914927
mutex_unlock(&gaudi->clk_gate_mutex);
48924928
}
@@ -5267,7 +5303,7 @@ static void gaudi_print_ecc_info_generic(struct hl_device *hdev,
52675303
}
52685304

52695305
if (disable_clock_gating) {
5270-
hdev->asic_funcs->enable_clock_gating(hdev);
5306+
hdev->asic_funcs->set_clock_gating(hdev);
52715307
mutex_unlock(&gaudi->clk_gate_mutex);
52725308
}
52735309
}
@@ -5754,7 +5790,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
57545790
/* Clear interrupts */
57555791
WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
57565792

5757-
hdev->asic_funcs->enable_clock_gating(hdev);
5793+
hdev->asic_funcs->set_clock_gating(hdev);
57585794

57595795
mutex_unlock(&gaudi->clk_gate_mutex);
57605796

@@ -6270,7 +6306,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
62706306
if (s)
62716307
seq_puts(s, "\n");
62726308

6273-
hdev->asic_funcs->enable_clock_gating(hdev);
6309+
hdev->asic_funcs->set_clock_gating(hdev);
62746310

62756311
mutex_unlock(&gaudi->clk_gate_mutex);
62766312

@@ -6371,7 +6407,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
63716407
dev_err(hdev->dev,
63726408
"Timeout while waiting for TPC%d icache prefetch\n",
63736409
tpc_id);
6374-
hdev->asic_funcs->enable_clock_gating(hdev);
6410+
hdev->asic_funcs->set_clock_gating(hdev);
63756411
mutex_unlock(&gaudi->clk_gate_mutex);
63766412
return -EIO;
63776413
}
@@ -6400,7 +6436,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
64006436
1000,
64016437
kernel_timeout);
64026438

6403-
hdev->asic_funcs->enable_clock_gating(hdev);
6439+
hdev->asic_funcs->set_clock_gating(hdev);
64046440
mutex_unlock(&gaudi->clk_gate_mutex);
64056441

64066442
if (rc) {
@@ -6741,7 +6777,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
67416777
.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
67426778
.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
67436779
.send_heartbeat = gaudi_send_heartbeat,
6744-
.enable_clock_gating = gaudi_enable_clock_gating,
6780+
.set_clock_gating = gaudi_set_clock_gating,
67456781
.disable_clock_gating = gaudi_disable_clock_gating,
67466782
.debug_coresight = gaudi_debug_coresight,
67476783
.is_device_idle = gaudi_is_device_idle,

drivers/misc/habanalabs/goya/goya.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5028,14 +5028,14 @@ int goya_armcp_info_get(struct hl_device *hdev)
50285028
return 0;
50295029
}
50305030

5031-
static void goya_enable_clock_gating(struct hl_device *hdev)
5031+
static void goya_set_clock_gating(struct hl_device *hdev)
50325032
{
5033-
5033+
/* clock gating not supported in Goya */
50345034
}
50355035

50365036
static void goya_disable_clock_gating(struct hl_device *hdev)
50375037
{
5038-
5038+
/* clock gating not supported in Goya */
50395039
}
50405040

50415041
static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
@@ -5259,7 +5259,7 @@ static const struct hl_asic_funcs goya_funcs = {
52595259
.mmu_invalidate_cache = goya_mmu_invalidate_cache,
52605260
.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
52615261
.send_heartbeat = goya_send_heartbeat,
5262-
.enable_clock_gating = goya_enable_clock_gating,
5262+
.set_clock_gating = goya_set_clock_gating,
52635263
.disable_clock_gating = goya_disable_clock_gating,
52645264
.debug_coresight = goya_debug_coresight,
52655265
.is_device_idle = goya_is_device_idle,

0 commit comments

Comments
 (0)