Skip to content

Commit f9d35b9

Browse files
Xiang Liualexdeucher
authored andcommitted
drm/amdgpu: Generate bad page threshold cper records
Generate CPER record when bad page threshold exceed and commit to CPER ring. v2: return -ENOMEM instead of false v2: check return value of fill section function Signed-off-by: Xiang Liu <[email protected]> Reviewed-by: Tao Zhou <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 4058e7c commit f9d35b9

File tree

3 files changed

+28
-1
lines changed

3 files changed

+28
-1
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev
207207
NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
208208

209209
amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false,
210-
CPER_SEV_FATAL, RUNTIME, NONSTD_SEC_LEN,
210+
CPER_SEV_NUM, RUNTIME, NONSTD_SEC_LEN,
211211
NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
212212

213213
section->hdr.valid_bits.err_info_cnt = 1;
@@ -308,6 +308,28 @@ int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
308308
return 0;
309309
}
310310

311+
int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev)
312+
{
313+
struct cper_hdr *bp_threshold = NULL;
314+
struct amdgpu_ring *ring = &adev->cper.ring_buf;
315+
int ret;
316+
317+
bp_threshold = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_BP_THRESHOLD, 1);
318+
if (!bp_threshold) {
319+
dev_err(adev->dev, "fail to alloc cper entry for bad page threshold record\n");
320+
return -ENOMEM;
321+
}
322+
323+
amdgpu_cper_entry_fill_hdr(adev, bp_threshold, AMDGPU_CPER_TYPE_BP_THRESHOLD, CPER_SEV_NUM);
324+
ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0);
325+
if (ret)
326+
return ret;
327+
328+
amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length);
329+
330+
return 0;
331+
}
332+
311333
static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev,
312334
enum aca_error_type aca_err_type)
313335
{

drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
9595
int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
9696
struct aca_banks *banks,
9797
uint16_t bank_count);
98+
/* Bad page threshold is encoded into separated cper entry */
99+
int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev);
98100
void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
99101
void *src, int count);
100102
int amdgpu_cper_init(struct amdgpu_device *adev);

drivers/gpu/drm/amd/pm/amdgpu_dpm.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,9 @@ int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev)
716716
ret = smu_send_rma_reason(smu);
717717
mutex_unlock(&adev->pm.mutex);
718718

719+
if (amdgpu_cper_generate_bp_threshold_record(adev))
720+
dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
721+
719722
return ret;
720723
}
721724

0 commit comments

Comments
 (0)