Skip to content

Commit f046ca4

Browse files
committed
Merge tag 'amd-drm-fixes-6.1-2022-10-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-6.1-2022-10-19: amdgpu: - Mode2 reset fixes for Sienna Cichlid - Revert broken fan speed sensor fix - SMU 13.x fixes - GC 11.x fixes - RAS fixes - SR-IOV fixes - Fix BO move breakage on SI - Misc compiler fixes Signed-off-by: Dave Airlie <[email protected]> From: Alex Deucher <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
2 parents 8865dd7 + 8273b40 commit f046ca4

34 files changed

+204
-91
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -274,9 +274,6 @@ extern int amdgpu_vcnfw_log;
274274
#define AMDGPU_RESET_VCE (1 << 13)
275275
#define AMDGPU_RESET_VCE1 (1 << 14)
276276

277-
#define AMDGPU_RESET_LEVEL_SOFT_RECOVERY (1 << 0)
278-
#define AMDGPU_RESET_LEVEL_MODE2 (1 << 1)
279-
280277
/* max cursor sizes (in pixels) */
281278
#define CIK_CURSOR_WIDTH 128
282279
#define CIK_CURSOR_HEIGHT 128
@@ -1065,7 +1062,6 @@ struct amdgpu_device {
10651062

10661063
struct work_struct reset_work;
10671064

1068-
uint32_t amdgpu_reset_level_mask;
10691065
bool job_hang;
10701066
};
10711067

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
134134
reset_context.method = AMD_RESET_METHOD_NONE;
135135
reset_context.reset_req_dev = adev;
136136
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
137-
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
138137

139138
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
140139
}

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)
111111

112112
lock_srbm(adev, mec, pipe, 0, 0);
113113

114-
WREG32(SOC15_REG_OFFSET(GC, 0, regCPC_INT_CNTL),
114+
WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
115115
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
116116
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
117117

drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1954,8 +1954,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
19541954
return PTR_ERR(ent);
19551955
}
19561956

1957-
debugfs_create_u32("amdgpu_reset_level", 0600, root, &adev->amdgpu_reset_level_mask);
1958-
19591957
/* Register debugfs entries for amdgpu_ttm */
19601958
amdgpu_ttm_debugfs_init(adev);
19611959
amdgpu_debugfs_pm_init(adev);

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2928,6 +2928,14 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
29282928
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
29292929
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
29302930

2931+
/*
2932+
* Per PMFW team's suggestion, driver needs to handle gfxoff
2933+
* and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2934+
* scenario. Add the missing df cstate disablement here.
2935+
*/
2936+
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2937+
dev_warn(adev->dev, "Failed to disallow df cstate");
2938+
29312939
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
29322940
if (!adev->ip_blocks[i].status.valid)
29332941
continue;
@@ -5210,7 +5218,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
52105218

52115219
reset_context->job = job;
52125220
reset_context->hive = hive;
5213-
52145221
/*
52155222
* Build list of devices to reset.
52165223
* In case we are in XGMI hive mode, resort the device list
@@ -5337,11 +5344,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
53375344
amdgpu_ras_resume(adev);
53385345
} else {
53395346
r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5340-
if (r && r == -EAGAIN) {
5341-
set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags);
5342-
adev->asic_reset_res = 0;
5347+
if (r && r == -EAGAIN)
53435348
goto retry;
5344-
}
53455349

53465350
if (!r && gpu_reset_for_dev_remove)
53475351
goto recover_end;
@@ -5777,7 +5781,6 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
57775781
reset_context.reset_req_dev = adev;
57785782
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
57795783
set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5780-
set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
57815784

57825785
adev->no_hw_access = true;
57835786
r = amdgpu_device_pre_asic_reset(adev, &reset_context);

drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
7272
reset_context.method = AMD_RESET_METHOD_NONE;
7373
reset_context.reset_req_dev = adev;
7474
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
75-
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
7675

7776
r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
7877
if (r)

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1950,7 +1950,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
19501950
reset_context.method = AMD_RESET_METHOD_NONE;
19511951
reset_context.reset_req_dev = adev;
19521952
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
1953-
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
19541953

19551954
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
19561955
}
@@ -2268,6 +2267,25 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
22682267

22692268
static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
22702269
{
2270+
if (amdgpu_sriov_vf(adev)) {
2271+
switch (adev->ip_versions[MP0_HWIP][0]) {
2272+
case IP_VERSION(13, 0, 2):
2273+
return true;
2274+
default:
2275+
return false;
2276+
}
2277+
}
2278+
2279+
if (adev->asic_type == CHIP_IP_DISCOVERY) {
2280+
switch (adev->ip_versions[MP0_HWIP][0]) {
2281+
case IP_VERSION(13, 0, 0):
2282+
case IP_VERSION(13, 0, 10):
2283+
return true;
2284+
default:
2285+
return false;
2286+
}
2287+
}
2288+
22712289
return adev->asic_type == CHIP_VEGA10 ||
22722290
adev->asic_type == CHIP_VEGA20 ||
22732291
adev->asic_type == CHIP_ARCTURUS ||
@@ -2311,11 +2329,6 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
23112329
!amdgpu_ras_asic_supported(adev))
23122330
return;
23132331

2314-
/* If driver run on sriov guest side, only enable ras for aldebaran */
2315-
if (amdgpu_sriov_vf(adev) &&
2316-
adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 2))
2317-
return;
2318-
23192332
if (!adev->gmc.xgmi.connected_to_cpu) {
23202333
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
23212334
dev_info(adev->dev, "MEM ECC is active.\n");

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ int amdgpu_reset_init(struct amdgpu_device *adev)
3737
{
3838
int ret = 0;
3939

40-
adev->amdgpu_reset_level_mask = 0x1;
41-
4240
switch (adev->ip_versions[MP1_HWIP][0]) {
4341
case IP_VERSION(13, 0, 2):
4442
ret = aldebaran_reset_init(adev);
@@ -76,12 +74,6 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
7674
{
7775
struct amdgpu_reset_handler *reset_handler = NULL;
7876

79-
if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2))
80-
return -ENOSYS;
81-
82-
if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
83-
return -ENOSYS;
84-
8577
if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
8678
reset_handler = adev->reset_cntl->get_reset_handler(
8779
adev->reset_cntl, reset_context);
@@ -98,12 +90,6 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
9890
int ret;
9991
struct amdgpu_reset_handler *reset_handler = NULL;
10092

101-
if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2))
102-
return -ENOSYS;
103-
104-
if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
105-
return -ENOSYS;
106-
10793
if (adev->reset_cntl)
10894
reset_handler = adev->reset_cntl->get_reset_handler(
10995
adev->reset_cntl, reset_context);

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ enum AMDGPU_RESET_FLAGS {
3030

3131
AMDGPU_NEED_FULL_RESET = 0,
3232
AMDGPU_SKIP_HW_RESET = 1,
33-
AMDGPU_SKIP_MODE2_RESET = 2,
34-
AMDGPU_RESET_FOR_DEVICE_REMOVE = 3,
33+
AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
3534
};
3635

3736
struct amdgpu_reset_context {

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -405,9 +405,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
405405
{
406406
ktime_t deadline = ktime_add_us(ktime_get(), 10000);
407407

408-
if (!(ring->adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_SOFT_RECOVERY))
409-
return false;
410-
411408
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
412409
return false;
413410

0 commit comments

Comments
 (0)