Skip to content

Commit e35184f

Browse files
committed
Merge tag 'drm-fixes-2022-10-21' of git://anongit.freedesktop.org/drm/drm
Pull drm fixes from Dave Airlie: "Usual fixes for the week. The amdgpu contains fixes for two regressions, one reported in response to rc1 which broke on SI GPUs, and one gfx9 APU regression. Otherwise it's mostly fixes for new IP, and some GPU reset fixes. vc4 is just HDMI fixes, and panfrost has some mnor types fixes. Core: - fix connector DDC pointer - fix buffer overflow in format_helper_test amdgpu: - Mode2 reset fixes for Sienna Cichlid - Revert broken fan speed sensor fix - SMU 13.x fixes - GC 11.x fixes - RAS fixes - SR-IOV fixes - Fix BO move breakage on SI - Misc compiler fixes - Fix gfx9 APU regression caused by PCI AER fix vc4: - HDMI fixes panfrost: - compiler fixes" * tag 'drm-fixes-2022-10-21' of git://anongit.freedesktop.org/drm/drm: (35 commits) drm/amdgpu: fix sdma doorbell init ordering on APUs drm/panfrost: replace endian-specific types with native ones drm/panfrost: Remove type name from internal structs drm/connector: Set DDC pointer in drmm_connector_init drm: tests: Fix a buffer overflow in format_helper_test drm/amdgpu: use DRM_SCHED_FENCE_DONT_PIPELINE for VM updates drm/sched: add DRM_SCHED_FENCE_DONT_PIPELINE flag drm/amdgpu: Fix for BO move issue drm/amdgpu: dequeue mes scheduler during fini drm/amd/pm: enable thermal alert on smu_v13_0_10 drm/amdgpu: Program GC registers through RLCG interface in gfx_v11/gmc_v11 drm/amdkfd: Fix type of reset_type parameter in hqd_destroy() callback drm/amd/display: Increase frame size limit for display_mode_vba_util_32.o drm/amd/pm: add SMU IP v13.0.4 IF version define to V7 drm/amd/pm: update SMU IP v13.0.4 driver interface version drm/amd/pm: Init pm_attr_list when dpm is disabled drm/amd/pm: disable cstate feature for gpu reset scenario drm/amd/pm: fulfill SMU13.0.7 cstate control interface drm/amd/pm: fulfill SMU13.0.0 cstate control interface drm/amdgpu: Add sriov vf ras support in amdgpu_ras_asic_supported ...
2 parents 6d36c72 + cbc543c commit e35184f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+314
-134
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -274,9 +274,6 @@ extern int amdgpu_vcnfw_log;
274274
#define AMDGPU_RESET_VCE (1 << 13)
275275
#define AMDGPU_RESET_VCE1 (1 << 14)
276276

277-
#define AMDGPU_RESET_LEVEL_SOFT_RECOVERY (1 << 0)
278-
#define AMDGPU_RESET_LEVEL_MODE2 (1 << 1)
279-
280277
/* max cursor sizes (in pixels) */
281278
#define CIK_CURSOR_WIDTH 128
282279
#define CIK_CURSOR_HEIGHT 128
@@ -1065,7 +1062,6 @@ struct amdgpu_device {
10651062

10661063
struct work_struct reset_work;
10671064

1068-
uint32_t amdgpu_reset_level_mask;
10691065
bool job_hang;
10701066
};
10711067

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
134134
reset_context.method = AMD_RESET_METHOD_NONE;
135135
reset_context.reset_req_dev = adev;
136136
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
137-
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
138137

139138
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
140139
}

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)
111111

112112
lock_srbm(adev, mec, pipe, 0, 0);
113113

114-
WREG32(SOC15_REG_OFFSET(GC, 0, regCPC_INT_CNTL),
114+
WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
115115
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
116116
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
117117

drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1954,8 +1954,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
19541954
return PTR_ERR(ent);
19551955
}
19561956

1957-
debugfs_create_u32("amdgpu_reset_level", 0600, root, &adev->amdgpu_reset_level_mask);
1958-
19591957
/* Register debugfs entries for amdgpu_ttm */
19601958
amdgpu_ttm_debugfs_init(adev);
19611959
amdgpu_debugfs_pm_init(adev);

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2928,6 +2928,14 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
29282928
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
29292929
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
29302930

2931+
/*
2932+
* Per PMFW team's suggestion, driver needs to handle gfxoff
2933+
* and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2934+
* scenario. Add the missing df cstate disablement here.
2935+
*/
2936+
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2937+
dev_warn(adev->dev, "Failed to disallow df cstate");
2938+
29312939
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
29322940
if (!adev->ip_blocks[i].status.valid)
29332941
continue;
@@ -5210,7 +5218,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
52105218

52115219
reset_context->job = job;
52125220
reset_context->hive = hive;
5213-
52145221
/*
52155222
* Build list of devices to reset.
52165223
* In case we are in XGMI hive mode, resort the device list
@@ -5337,11 +5344,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
53375344
amdgpu_ras_resume(adev);
53385345
} else {
53395346
r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5340-
if (r && r == -EAGAIN) {
5341-
set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags);
5342-
adev->asic_reset_res = 0;
5347+
if (r && r == -EAGAIN)
53435348
goto retry;
5344-
}
53455349

53465350
if (!r && gpu_reset_for_dev_remove)
53475351
goto recover_end;
@@ -5777,7 +5781,6 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
57775781
reset_context.reset_req_dev = adev;
57785782
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
57795783
set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5780-
set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
57815784

57825785
adev->no_hw_access = true;
57835786
r = amdgpu_device_pre_asic_reset(adev, &reset_context);

drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
7272
reset_context.method = AMD_RESET_METHOD_NONE;
7373
reset_context.reset_req_dev = adev;
7474
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
75-
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
7675

7776
r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
7877
if (r)

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1950,7 +1950,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
19501950
reset_context.method = AMD_RESET_METHOD_NONE;
19511951
reset_context.reset_req_dev = adev;
19521952
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
1953-
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
19541953

19551954
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
19561955
}
@@ -2268,6 +2267,25 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
22682267

22692268
static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
22702269
{
2270+
if (amdgpu_sriov_vf(adev)) {
2271+
switch (adev->ip_versions[MP0_HWIP][0]) {
2272+
case IP_VERSION(13, 0, 2):
2273+
return true;
2274+
default:
2275+
return false;
2276+
}
2277+
}
2278+
2279+
if (adev->asic_type == CHIP_IP_DISCOVERY) {
2280+
switch (adev->ip_versions[MP0_HWIP][0]) {
2281+
case IP_VERSION(13, 0, 0):
2282+
case IP_VERSION(13, 0, 10):
2283+
return true;
2284+
default:
2285+
return false;
2286+
}
2287+
}
2288+
22712289
return adev->asic_type == CHIP_VEGA10 ||
22722290
adev->asic_type == CHIP_VEGA20 ||
22732291
adev->asic_type == CHIP_ARCTURUS ||
@@ -2311,11 +2329,6 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
23112329
!amdgpu_ras_asic_supported(adev))
23122330
return;
23132331

2314-
/* If driver run on sriov guest side, only enable ras for aldebaran */
2315-
if (amdgpu_sriov_vf(adev) &&
2316-
adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 2))
2317-
return;
2318-
23192332
if (!adev->gmc.xgmi.connected_to_cpu) {
23202333
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
23212334
dev_info(adev->dev, "MEM ECC is active.\n");

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ int amdgpu_reset_init(struct amdgpu_device *adev)
3737
{
3838
int ret = 0;
3939

40-
adev->amdgpu_reset_level_mask = 0x1;
41-
4240
switch (adev->ip_versions[MP1_HWIP][0]) {
4341
case IP_VERSION(13, 0, 2):
4442
ret = aldebaran_reset_init(adev);
@@ -76,12 +74,6 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
7674
{
7775
struct amdgpu_reset_handler *reset_handler = NULL;
7876

79-
if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2))
80-
return -ENOSYS;
81-
82-
if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
83-
return -ENOSYS;
84-
8577
if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
8678
reset_handler = adev->reset_cntl->get_reset_handler(
8779
adev->reset_cntl, reset_context);
@@ -98,12 +90,6 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
9890
int ret;
9991
struct amdgpu_reset_handler *reset_handler = NULL;
10092

101-
if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2))
102-
return -ENOSYS;
103-
104-
if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
105-
return -ENOSYS;
106-
10793
if (adev->reset_cntl)
10894
reset_handler = adev->reset_cntl->get_reset_handler(
10995
adev->reset_cntl, reset_context);

drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ enum AMDGPU_RESET_FLAGS {
3030

3131
AMDGPU_NEED_FULL_RESET = 0,
3232
AMDGPU_SKIP_HW_RESET = 1,
33-
AMDGPU_SKIP_MODE2_RESET = 2,
34-
AMDGPU_RESET_FOR_DEVICE_REMOVE = 3,
33+
AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
3534
};
3635

3736
struct amdgpu_reset_context {

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -405,9 +405,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
405405
{
406406
ktime_t deadline = ktime_add_us(ktime_get(), 10000);
407407

408-
if (!(ring->adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_SOFT_RECOVERY))
409-
return false;
410-
411408
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
412409
return false;
413410

0 commit comments

Comments
 (0)