Skip to content

Commit c538991

Browse files
candicelicyalexdeucher
authored andcommitted
drm/amd/pm: Enable ecc_info table support for smu v13_0_10
Support EccInfoTable which includes umc ras error count and error address. Signed-off-by: Candice Li <[email protected]> Reviewed-by: Evan Quan <[email protected]> Reviewed-by: Stanley.Yang <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 06630fb commit c538991

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "asic_reg/mp/mp_13_0_0_sh_mask.h"
4747
#include "smu_cmn.h"
4848
#include "amdgpu_ras.h"
49+
#include "umc_v8_10.h"
4950

5051
/*
5152
* DO NOT use these for err/warn/info/debug messages.
@@ -90,6 +91,12 @@
9091

9192
#define DEBUGSMC_MSG_Mode1Reset 2
9293

94+
/*
95+
* SMU_v13_0_10 supports ECCTABLE since version 80.34.0,
96+
* use this to check ECCTABLE feature whether support
97+
*/
98+
#define SUPPORT_ECCTABLE_SMU_13_0_10_VERSION 0x00502200
99+
93100
static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = {
94101
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1),
95102
MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1),
@@ -229,6 +236,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
229236
TAB_MAP(ACTIVITY_MONITOR_COEFF),
230237
[SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE},
231238
TAB_MAP(I2C_COMMANDS),
239+
TAB_MAP(ECCINFO),
232240
};
233241

234242
static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = {
@@ -462,6 +470,8 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
462470
AMDGPU_GEM_DOMAIN_VRAM);
463471
SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE,
464472
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
473+
SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t),
474+
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
465475

466476
smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL);
467477
if (!smu_table->metrics_table)
@@ -477,8 +487,14 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
477487
if (!smu_table->watermarks_table)
478488
goto err2_out;
479489

490+
smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL);
491+
if (!smu_table->ecc_table)
492+
goto err3_out;
493+
480494
return 0;
481495

496+
err3_out:
497+
kfree(smu_table->watermarks_table);
482498
err2_out:
483499
kfree(smu_table->gpu_metrics_table);
484500
err1_out:
@@ -2036,6 +2052,64 @@ static int smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu,
20362052
return ret;
20372053
}
20382054

2055+
static int smu_v13_0_0_check_ecc_table_support(struct smu_context *smu)
2056+
{
2057+
struct amdgpu_device *adev = smu->adev;
2058+
uint32_t if_version = 0xff, smu_version = 0xff;
2059+
int ret = 0;
2060+
2061+
ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version);
2062+
if (ret)
2063+
return -EOPNOTSUPP;
2064+
2065+
if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) &&
2066+
(smu_version >= SUPPORT_ECCTABLE_SMU_13_0_10_VERSION))
2067+
return ret;
2068+
else
2069+
return -EOPNOTSUPP;
2070+
}
2071+
2072+
static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu,
2073+
void *table)
2074+
{
2075+
struct smu_table_context *smu_table = &smu->smu_table;
2076+
struct amdgpu_device *adev = smu->adev;
2077+
EccInfoTable_t *ecc_table = NULL;
2078+
struct ecc_info_per_ch *ecc_info_per_channel = NULL;
2079+
int i, ret = 0;
2080+
struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table;
2081+
2082+
ret = smu_v13_0_0_check_ecc_table_support(smu);
2083+
if (ret)
2084+
return ret;
2085+
2086+
ret = smu_cmn_update_table(smu,
2087+
SMU_TABLE_ECCINFO,
2088+
0,
2089+
smu_table->ecc_table,
2090+
false);
2091+
if (ret) {
2092+
dev_info(adev->dev, "Failed to export SMU ecc table!\n");
2093+
return ret;
2094+
}
2095+
2096+
ecc_table = (EccInfoTable_t *)smu_table->ecc_table;
2097+
2098+
for (i = 0; i < UMC_V8_10_TOTAL_CHANNEL_NUM(adev); i++) {
2099+
ecc_info_per_channel = &(eccinfo->ecc[i]);
2100+
ecc_info_per_channel->ce_count_lo_chip =
2101+
ecc_table->EccInfo[i].ce_count_lo_chip;
2102+
ecc_info_per_channel->ce_count_hi_chip =
2103+
ecc_table->EccInfo[i].ce_count_hi_chip;
2104+
ecc_info_per_channel->mca_umc_status =
2105+
ecc_table->EccInfo[i].mca_umc_status;
2106+
ecc_info_per_channel->mca_umc_addr =
2107+
ecc_table->EccInfo[i].mca_umc_addr;
2108+
}
2109+
2110+
return ret;
2111+
}
2112+
20392113
static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
20402114
.get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask,
20412115
.set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
@@ -2111,6 +2185,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
21112185
.send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num,
21122186
.send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag,
21132187
.gpo_control = smu_v13_0_gpo_control,
2188+
.get_ecc_info = smu_v13_0_0_get_ecc_info,
21142189
};
21152190

21162191
void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)

0 commit comments

Comments
 (0)