Skip to content

Commit 134d16d

Browse files
John Clementsalexdeucher
authored andcommitted
drm/amdgpu: RAS harvest on driver load
In event of RAS UE + warm reset, error counters shall be harvested and cleared on driver load Reviewed-by: Hawking Zhang <[email protected]> Signed-off-by: John Clements <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 340c571 commit 134d16d

File tree

1 file changed

+29
-0
lines changed

1 file changed

+29
-0
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2090,6 +2090,32 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
20902090
return r;
20912091
}
20922092

2093+
static int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
2094+
{
2095+
if (adev->gmc.xgmi.connected_to_cpu)
2096+
return 1;
2097+
return 0;
2098+
}
2099+
2100+
static int amdgpu_persistent_edc_harvesting(struct amdgpu_device *adev,
2101+
struct ras_common_if *ras_block)
2102+
{
2103+
struct ras_query_if info = {
2104+
.head = *ras_block,
2105+
};
2106+
2107+
if (!amdgpu_persistent_edc_harvesting_supported(adev))
2108+
return 0;
2109+
2110+
if (amdgpu_ras_query_error_status(adev, &info) != 0)
2111+
DRM_WARN("RAS init harvest failure");
2112+
2113+
if (amdgpu_ras_reset_error_status(adev, ras_block->block) != 0)
2114+
DRM_WARN("RAS init harvest reset failure");
2115+
2116+
return 0;
2117+
}
2118+
20932119
/* helper function to handle common stuff in ip late init phase */
20942120
int amdgpu_ras_late_init(struct amdgpu_device *adev,
20952121
struct ras_common_if *ras_block,
@@ -2119,6 +2145,9 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
21192145
return r;
21202146
}
21212147

2148+
/* check for errors on warm reset edc persisant supported ASIC */
2149+
amdgpu_persistent_edc_harvesting(adev, ras_block);
2150+
21222151
/* in resume phase, no need to create ras fs node */
21232152
if (adev->in_suspend || amdgpu_in_reset(adev))
21242153
return 0;

0 commit comments

Comments
 (0)