Skip to content

Commit 377b5b3

Browse files
committed
Merge tag 'amd-drm-next-6.10-2024-04-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.10-2024-04-19: amdgpu: - DC resource allocation logic updates - DC IPS fixes - DC YUV fixes - DMCUB fixes - DML2 fixes - Devcoredump updates - USB-C DSC fix - Misc display code cleanups - PSR fixes - MES timeout fix - RAS updates - UAF fix in VA IOCTL - Fix visible VRAM handling during faults - Fix IP discovery handling during PCI rescans - Misc code cleanups - PSP 14 updates - More runtime PM code rework - SMU 14.0.2 support - GPUVM page fault redirection to secondary IH rings for IH 6.x - Suspend/resume fixes - SR-IOV fixes amdkfd: - Fix eviction fence handling - Fix leak in GPU memory allocation failure case - DMABuf import handling fix radeon: - Silence UBSAN warnings related to flexible arrays Signed-off-by: Dave Airlie <[email protected]> From: Alex Deucher <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
2 parents fad3dad + 81bf145 commit 377b5b3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+4589
-344
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,7 @@ bool amdgpu_device_supports_px(struct drm_device *dev);
14091409
bool amdgpu_device_supports_boco(struct drm_device *dev);
14101410
bool amdgpu_device_supports_smart_shift(struct drm_device *dev);
14111411
int amdgpu_device_supports_baco(struct drm_device *dev);
1412+
void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev);
14121413
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
14131414
struct amdgpu_device *peer_adev);
14141415
int amdgpu_device_baco_enter(struct drm_device *dev);

drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -753,23 +753,13 @@ int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info)
753753

754754
static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank)
755755
{
756-
int error_code;
757-
758-
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
759-
case IP_VERSION(13, 0, 6):
760-
if (!(adev->flags & AMD_IS_APU) && adev->pm.fw_version >= 0x00555600) {
761-
error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]);
762-
return error_code & 0xff;
763-
}
764-
break;
765-
default:
766-
break;
767-
}
756+
struct amdgpu_aca *aca = &adev->aca;
757+
const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
768758

769-
/* NOTE: the true error code is encoded in status.errorcode[0:7] */
770-
error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]);
759+
if (!smu_funcs || !smu_funcs->parse_error_code)
760+
return -EOPNOTSUPP;
771761

772-
return error_code & 0xff;
762+
return smu_funcs->parse_error_code(adev, bank);
773763
}
774764

775765
int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size)
@@ -780,6 +770,9 @@ int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank
780770
return -EINVAL;
781771

782772
error_code = aca_bank_get_error_code(adev, bank);
773+
if (error_code < 0)
774+
return error_code;
775+
783776
for (i = 0; i < size; i++) {
784777
if (err_codes[i] == error_code)
785778
return 0;

drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ struct aca_smu_funcs {
173173
int (*set_debug_mode)(struct amdgpu_device *adev, bool enable);
174174
int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count);
175175
int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank);
176+
int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank);
176177
};
177178

178179
struct amdgpu_aca {

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1854,6 +1854,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
18541854
err_bo_create:
18551855
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
18561856
err_reserve_limit:
1857+
amdgpu_sync_free(&(*mem)->sync);
18571858
mutex_destroy(&(*mem)->lock);
18581859
if (gobj)
18591860
drm_gem_object_put(gobj);
@@ -2900,13 +2901,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
29002901

29012902
amdgpu_sync_create(&sync_obj);
29022903

2903-
/* Validate BOs and map them to GPUVM (update VM page tables). */
2904+
/* Validate BOs managed by KFD */
29042905
list_for_each_entry(mem, &process_info->kfd_bo_list,
29052906
validate_list) {
29062907

29072908
struct amdgpu_bo *bo = mem->bo;
29082909
uint32_t domain = mem->domain;
2909-
struct kfd_mem_attachment *attachment;
29102910
struct dma_resv_iter cursor;
29112911
struct dma_fence *fence;
29122912

@@ -2931,6 +2931,25 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
29312931
goto validate_map_fail;
29322932
}
29332933
}
2934+
}
2935+
2936+
if (failed_size)
2937+
pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
2938+
2939+
/* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
2940+
* validations above would invalidate DMABuf imports again.
2941+
*/
2942+
ret = process_validate_vms(process_info, &exec.ticket);
2943+
if (ret) {
2944+
pr_debug("Validating VMs failed, ret: %d\n", ret);
2945+
goto validate_map_fail;
2946+
}
2947+
2948+
/* Update mappings managed by KFD. */
2949+
list_for_each_entry(mem, &process_info->kfd_bo_list,
2950+
validate_list) {
2951+
struct kfd_mem_attachment *attachment;
2952+
29342953
list_for_each_entry(attachment, &mem->attachments, list) {
29352954
if (!attachment->is_mapped)
29362955
continue;
@@ -2947,18 +2966,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
29472966
}
29482967
}
29492968

2950-
if (failed_size)
2951-
pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
2952-
2953-
/* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
2954-
* validations above would invalidate DMABuf imports again.
2955-
*/
2956-
ret = process_validate_vms(process_info, &exec.ticket);
2957-
if (ret) {
2958-
pr_debug("Validating VMs failed, ret: %d\n", ret);
2959-
goto validate_map_fail;
2960-
}
2961-
29622969
/* Update mappings not managed by KFD */
29632970
list_for_each_entry(peer_vm, &process_info->vm_list_head,
29642971
vm_list_node) {

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
819819

820820
p->bytes_moved += ctx.bytes_moved;
821821
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
822-
amdgpu_bo_in_cpu_visible_vram(bo))
822+
amdgpu_res_cpu_visible(adev, bo->tbo.resource))
823823
p->bytes_moved_vis += ctx.bytes_moved;
824824

825825
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {

drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,11 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
188188
adev->vpe.feature_version, adev->vpe.fw_version);
189189

190190
drm_printf(p, "\nVBIOS Information\n");
191-
drm_printf(p, "name: %s\n", ctx->name);
192-
drm_printf(p, "pn %s\n", ctx->vbios_pn);
193-
drm_printf(p, "version: %s\n", ctx->vbios_ver_str);
194-
drm_printf(p, "date: %s\n", ctx->date);
191+
drm_printf(p, "vbios name : %s\n", ctx->name);
192+
drm_printf(p, "vbios pn : %s\n", ctx->vbios_pn);
193+
drm_printf(p, "vbios version : %d\n", ctx->version);
194+
drm_printf(p, "vbios ver_str : %s\n", ctx->vbios_ver_str);
195+
drm_printf(p, "vbios date : %s\n", ctx->date);
195196
}
196197

197198
static ssize_t

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 81 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,81 @@ int amdgpu_device_supports_baco(struct drm_device *dev)
350350
return amdgpu_asic_supports_baco(adev);
351351
}
352352

353+
void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
354+
{
355+
struct drm_device *dev;
356+
int bamaco_support;
357+
358+
dev = adev_to_drm(adev);
359+
360+
adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
361+
bamaco_support = amdgpu_device_supports_baco(dev);
362+
363+
switch (amdgpu_runtime_pm) {
364+
case 2:
365+
if (bamaco_support & MACO_SUPPORT) {
366+
adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
367+
dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
368+
} else if (bamaco_support == BACO_SUPPORT) {
369+
adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
370+
dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
371+
}
372+
break;
373+
case 1:
374+
if (bamaco_support & BACO_SUPPORT) {
375+
adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
376+
dev_info(adev->dev, "Forcing BACO for runtime pm\n");
377+
}
378+
break;
379+
case -1:
380+
case -2:
381+
if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */
382+
adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
383+
dev_info(adev->dev, "Using ATPX for runtime pm\n");
384+
} else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */
385+
adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
386+
dev_info(adev->dev, "Using BOCO for runtime pm\n");
387+
} else {
388+
if (!bamaco_support)
389+
goto no_runtime_pm;
390+
391+
switch (adev->asic_type) {
392+
case CHIP_VEGA20:
393+
case CHIP_ARCTURUS:
394+
/* BACO are not supported on vega20 and arctrus */
395+
break;
396+
case CHIP_VEGA10:
397+
/* enable BACO as runpm mode if noretry=0 */
398+
if (!adev->gmc.noretry)
399+
adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
400+
break;
401+
default:
402+
/* enable BACO as runpm mode on CI+ */
403+
adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
404+
break;
405+
}
406+
407+
if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
408+
if (bamaco_support & MACO_SUPPORT) {
409+
adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
410+
dev_info(adev->dev, "Using BAMACO for runtime pm\n");
411+
} else {
412+
dev_info(adev->dev, "Using BACO for runtime pm\n");
413+
}
414+
}
415+
}
416+
break;
417+
case 0:
418+
dev_info(adev->dev, "runtime pm is manually disabled\n");
419+
break;
420+
default:
421+
break;
422+
}
423+
424+
no_runtime_pm:
425+
if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
426+
dev_info(adev->dev, "Runtime PM not available\n");
427+
}
353428
/**
354429
* amdgpu_device_supports_smart_shift - Is the device dGPU with
355430
* smart shift support
@@ -1460,7 +1535,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
14601535

14611536
/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
14621537
if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1463-
DRM_WARN("System can't access extended configuration space,please check!!\n");
1538+
DRM_WARN("System can't access extended configuration space, please check!!\n");
14641539

14651540
/* skip if the bios has already enabled large BAR */
14661541
if (adev->gmc.real_vram_size &&
@@ -5282,7 +5357,9 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
52825357
/* Try reset handler method first */
52835358
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
52845359
reset_list);
5285-
amdgpu_reset_reg_dumps(tmp_adev);
5360+
5361+
if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5362+
amdgpu_reset_reg_dumps(tmp_adev);
52865363

52875364
reset_context->reset_device_list = device_list_handle;
52885365
r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
@@ -5355,7 +5432,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
53555432

53565433
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
53575434

5358-
amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5435+
if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5436+
amdgpu_coredump(tmp_adev, vram_lost, reset_context);
53595437

53605438
if (vram_lost) {
53615439
DRM_INFO("VRAM is lost due to GPU reset!\n");

drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,6 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
255255
uint64_t vram_size;
256256
u32 msg;
257257
int i, ret = 0;
258-
int ip_discovery_ver = 0;
259258

260259
/* It can take up to a second for IFWI init to complete on some dGPUs,
261260
* but generally it should be in the 60-100ms range. Normally this starts
@@ -265,17 +264,13 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
265264
* continue.
266265
*/
267266

268-
ip_discovery_ver = RREG32(mmIP_DISCOVERY_VERSION);
269-
if ((dev_is_removable(&adev->pdev->dev)) ||
270-
(ip_discovery_ver == IP_DISCOVERY_V2) ||
271-
(ip_discovery_ver == IP_DISCOVERY_V4)) {
272-
for (i = 0; i < 1000; i++) {
273-
msg = RREG32(mmMP0_SMN_C2PMSG_33);
274-
if (msg & 0x80000000)
275-
break;
276-
msleep(1);
277-
}
267+
for (i = 0; i < 1000; i++) {
268+
msg = RREG32(mmMP0_SMN_C2PMSG_33);
269+
if (msg & 0x80000000)
270+
break;
271+
usleep_range(1000, 1100);
278272
}
273+
279274
vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
280275

281276
if (vram_size) {
@@ -1906,6 +1901,8 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
19061901
break;
19071902
case IP_VERSION(14, 0, 0):
19081903
case IP_VERSION(14, 0, 1):
1904+
case IP_VERSION(14, 0, 2):
1905+
case IP_VERSION(14, 0, 3):
19091906
amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
19101907
break;
19111908
default:

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2481,6 +2481,7 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
24812481

24822482
/* Use a common context, just need to make sure full reset is done */
24832483
set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
2484+
set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
24842485
r = amdgpu_do_asic_reset(&device_list, &reset_context);
24852486

24862487
if (r) {

drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

Lines changed: 1 addition & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,6 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
133133
int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
134134
{
135135
struct drm_device *dev;
136-
int bamaco_support = 0;
137136
int r, acpi_status;
138137

139138
dev = adev_to_drm(adev);
@@ -150,52 +149,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
150149
goto out;
151150
}
152151

153-
adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
154-
if (amdgpu_device_supports_px(dev) &&
155-
(amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */
156-
adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
157-
dev_info(adev->dev, "Using ATPX for runtime pm\n");
158-
} else if (amdgpu_device_supports_boco(dev) &&
159-
(amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */
160-
adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
161-
dev_info(adev->dev, "Using BOCO for runtime pm\n");
162-
} else if (amdgpu_runtime_pm != 0) {
163-
bamaco_support = amdgpu_device_supports_baco(dev);
164-
165-
if (!bamaco_support)
166-
goto no_runtime_pm;
167-
168-
switch (adev->asic_type) {
169-
case CHIP_VEGA20:
170-
case CHIP_ARCTURUS:
171-
/* enable BACO as runpm mode if runpm=1 */
172-
if (amdgpu_runtime_pm > 0)
173-
adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
174-
break;
175-
case CHIP_VEGA10:
176-
/* enable BACO as runpm mode if noretry=0 */
177-
if (!adev->gmc.noretry)
178-
adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
179-
break;
180-
default:
181-
/* enable BACO as runpm mode on CI+ */
182-
adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
183-
break;
184-
}
185-
186-
if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
187-
if (bamaco_support & MACO_SUPPORT) {
188-
adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
189-
dev_info(adev->dev, "Using BAMACO for runtime pm\n");
190-
} else {
191-
dev_info(adev->dev, "Using BACO for runtime pm\n");
192-
}
193-
}
194-
}
195-
196-
no_runtime_pm:
197-
if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
198-
dev_info(adev->dev, "NO pm mode for runtime pm\n");
152+
amdgpu_device_detect_runtime_pm_mode(adev);
199153

200154
/* Call ACPI methods: require modeset init
201155
* but failure is not fatal

0 commit comments

Comments
 (0)