Skip to content

Commit a86e0c0

Browse files
Lijo Lazaralexdeucher
authored andcommitted
drm/amdgpu: Add init level for post reset reinit
When device needs to be reset before initialization, it's not required for all IPs to be initialized before a reset. In such cases, it needs to identify whether the IP/feature is initialized for the first time or whether it's reinitialized after a reset. Add RESET_RECOVERY init level to identify post reset reinitialization phase. This only provides a device level identification, IP/features may choose to track their state independently also. Signed-off-by: Lijo Lazar <[email protected]> Acked-by: Tao Zhou <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 6719ab8 commit a86e0c0

File tree

7 files changed

+38
-3
lines changed

7 files changed

+38
-3
lines changed

drivers/gpu/drm/amd/amdgpu/aldebaran.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
330330
}
331331

332332
list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
333+
amdgpu_set_init_level(tmp_adev,
334+
AMDGPU_INIT_LEVEL_RESET_RECOVERY);
333335
dev_info(tmp_adev->dev,
334336
"GPU reset succeeded, trying to resume\n");
335337
r = aldebaran_mode2_restore_ip(tmp_adev);
@@ -375,6 +377,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
375377
tmp_adev);
376378

377379
if (!r) {
380+
amdgpu_set_init_level(tmp_adev,
381+
AMDGPU_INIT_LEVEL_DEFAULT);
378382
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
379383

380384
r = amdgpu_ib_ring_tests(tmp_adev);

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -839,6 +839,7 @@ struct amdgpu_mqd {
839839
enum amdgpu_init_lvl_id {
840840
AMDGPU_INIT_LEVEL_DEFAULT,
841841
AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
842+
AMDGPU_INIT_LEVEL_RESET_RECOVERY,
842843
};
843844

844845
struct amdgpu_init_level {

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,11 @@ struct amdgpu_init_level amdgpu_init_default = {
156156
.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
157157
};
158158

159+
struct amdgpu_init_level amdgpu_init_recovery = {
160+
.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
161+
.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
162+
};
163+
159164
/*
160165
* Minimal blocks needed to be initialized before a XGMI hive can be reset. This
161166
* is used for cases like reset on initialization where the entire hive needs to
@@ -182,6 +187,9 @@ void amdgpu_set_init_level(struct amdgpu_device *adev,
182187
case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
183188
adev->init_lvl = &amdgpu_init_minimal_xgmi;
184189
break;
190+
case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
191+
adev->init_lvl = &amdgpu_init_recovery;
192+
break;
185193
case AMDGPU_INIT_LEVEL_DEFAULT:
186194
fallthrough;
187195
default:
@@ -5419,7 +5427,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
54195427
struct list_head *device_list_handle;
54205428
bool full_reset, vram_lost = false;
54215429
struct amdgpu_device *tmp_adev;
5422-
int r;
5430+
int r, init_level;
54235431

54245432
device_list_handle = reset_context->reset_device_list;
54255433

@@ -5428,10 +5436,18 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
54285436

54295437
full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
54305438

5439+
/**
5440+
* If it's reset on init, it's default init level, otherwise keep level
5441+
* as recovery level.
5442+
*/
5443+
if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
5444+
init_level = AMDGPU_INIT_LEVEL_DEFAULT;
5445+
else
5446+
init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
5447+
54315448
r = 0;
54325449
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5433-
/* After reset, it's default init level */
5434-
amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
5450+
amdgpu_set_init_level(tmp_adev, init_level);
54355451
if (full_reset) {
54365452
/* post card */
54375453
amdgpu_ras_set_fed(tmp_adev, false);
@@ -5518,6 +5534,9 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
55185534

55195535
out:
55205536
if (!r) {
5537+
/* IP init is complete now, set level as default */
5538+
amdgpu_set_init_level(tmp_adev,
5539+
AMDGPU_INIT_LEVEL_DEFAULT);
55215540
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
55225541
r = amdgpu_ib_ring_tests(tmp_adev);
55235542
if (r) {

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,3 +342,8 @@ void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
342342
strscpy(buf, "unknown", len);
343343
}
344344
}
345+
346+
bool amdgpu_reset_in_recovery(struct amdgpu_device *adev)
347+
{
348+
return (adev->init_lvl->level == AMDGPU_INIT_LEVEL_RESET_RECOVERY);
349+
}

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,4 +158,6 @@ extern struct amdgpu_reset_handler xgmi_reset_on_init_handler;
158158
int amdgpu_reset_do_xgmi_reset_on_init(
159159
struct amdgpu_reset_context *reset_context);
160160

161+
bool amdgpu_reset_in_recovery(struct amdgpu_device *adev);
162+
161163
#endif

drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
220220
int r;
221221
struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
222222

223+
amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY);
223224
dev_info(tmp_adev->dev,
224225
"GPU reset succeeded, trying to resume\n");
225226
r = sienna_cichlid_mode2_restore_ip(tmp_adev);
@@ -237,6 +238,7 @@ sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
237238

238239
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
239240

241+
amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
240242
r = amdgpu_ib_ring_tests(tmp_adev);
241243
if (r) {
242244
dev_err(tmp_adev->dev,

drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
221221
int r;
222222
struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
223223

224+
amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY);
224225
dev_info(tmp_adev->dev,
225226
"GPU reset succeeded, trying to resume\n");
226227
r = smu_v13_0_10_mode2_restore_ip(tmp_adev);
@@ -234,6 +235,7 @@ smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
234235

235236
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
236237

238+
amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
237239
r = amdgpu_ib_ring_tests(tmp_adev);
238240
if (r) {
239241
dev_err(tmp_adev->dev,

0 commit comments

Comments
 (0)