Skip to content

Commit 786cb0a

Browse files
committed
Merge tag 'drm-fixes-2021-07-16' of git://anongit.freedesktop.org/drm/drm
Pull drm fixes from Dave Airlie: "Regular rc2 fixes though a bit more than usual at rc2 stage, people must have been testing early or else some fixes from last week got a bit laggy. There is one larger change in the amd fixes to amalgamate some power management code on the newer chips with the code from the older chips, it should only affects chips where support was introduced in rc1 and it should make future fixes easier to maintain probably a good idea to merge it now. Otherwise it's mostly fixes across the board. dma-buf: - Fix fence leak in sync_file_merge() error code drm/panel: - nt35510: Don't fail on DSI reads fbdev: - Avoid use-after-free by not deleting current video mode ttm: - Avoid NULL-ptr deref in ttm_range_man_fini() vmwgfx: - Fix a merge commit qxl: - fix a TTM regression amdgpu: - SR-IOV fixes - RAS fixes - eDP fixes - SMU13 code unification to facilitate fixes in the future - Add new renoir DID - Yellow Carp fixes - Beige Goby fixes - Revert a bunch of TLB fixes that caused regressions - Revert an LTTPR display regression amdkfd - Fix VRAM access regression - SVM fixes i915: - Fix -EDEADLK handling regression - Drop the page table optimisation" * tag 'drm-fixes-2021-07-16' of git://anongit.freedesktop.org/drm/drm: (29 commits) drm/amdgpu: add another Renoir DID drm/ttm: add a check against null pointer dereference drm/i915/gtt: drop the page table optimisation drm/i915/gt: Fix -EDEADLK handling regression drm/amd/pm: Add waiting for response of mode-reset message for yellow carp Revert "drm/amdkfd: Add heavy-weight TLB flush after unmapping" Revert "drm/amdgpu: Add table_freed parameter to amdgpu_vm_bo_update" Revert "drm/amdkfd: Make TLB flush conditional on mapping" Revert "drm/amdgpu: Fix warning of Function parameter or member not described" Revert "drm/amdkfd: Add memory sync before TLB flush on unmap" drm/amd/pm: Fix BACO state setting for Beige_Goby drm/amdgpu: Restore msix after FLR drm/amdkfd: Allow CPU access for all VRAM BOs drm/amdgpu/display - only update eDP's backlight level when necessary drm/amdkfd: handle fault counters on invalid address drm/amdgpu: Correct the irq numbers for virtual crtc drm/amd/display: update header file name drm/amd/pm: drop smu_v13_0_1.c|h files for yellow carp drm/amd/display: remove faulty assert Revert "drm/amd/display: Always write repeater mode regardless of LTTPR" ...
2 parents 6e442d0 + 876d98e commit 786cb0a

38 files changed

+202
-1395
lines changed

drivers/dma-buf/sync_file.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,8 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
211211
struct sync_file *b)
212212
{
213213
struct sync_file *sync_file;
214-
struct dma_fence **fences, **nfences, **a_fences, **b_fences;
215-
int i, i_a, i_b, num_fences, a_num_fences, b_num_fences;
214+
struct dma_fence **fences = NULL, **nfences, **a_fences, **b_fences;
215+
int i = 0, i_a, i_b, num_fences, a_num_fences, b_num_fences;
216216

217217
sync_file = sync_file_alloc();
218218
if (!sync_file)
@@ -236,7 +236,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
236236
* If a sync_file can only be created with sync_file_merge
237237
* and sync_file_create, this is a reasonable assumption.
238238
*/
239-
for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) {
239+
for (i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) {
240240
struct dma_fence *pt_a = a_fences[i_a];
241241
struct dma_fence *pt_b = b_fences[i_b];
242242

@@ -277,15 +277,16 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
277277
fences = nfences;
278278
}
279279

280-
if (sync_file_set_fence(sync_file, fences, i) < 0) {
281-
kfree(fences);
280+
if (sync_file_set_fence(sync_file, fences, i) < 0)
282281
goto err;
283-
}
284282

285283
strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name));
286284
return sync_file;
287285

288286
err:
287+
while (i)
288+
dma_fence_put(fences[--i]);
289+
kfree(fences);
289290
fput(sync_file->file);
290291
return NULL;
291292

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
269269
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
270270
uint64_t *size);
271271
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
272-
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *table_freed);
272+
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
273273
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
274274
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
275275
int amdgpu_amdkfd_gpuvm_sync_memory(

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,8 +1057,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
10571057

10581058
static int update_gpuvm_pte(struct kgd_mem *mem,
10591059
struct kfd_mem_attachment *entry,
1060-
struct amdgpu_sync *sync,
1061-
bool *table_freed)
1060+
struct amdgpu_sync *sync)
10621061
{
10631062
struct amdgpu_bo_va *bo_va = entry->bo_va;
10641063
struct amdgpu_device *adev = entry->adev;
@@ -1069,7 +1068,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
10691068
return ret;
10701069

10711070
/* Update the page tables */
1072-
ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed);
1071+
ret = amdgpu_vm_bo_update(adev, bo_va, false);
10731072
if (ret) {
10741073
pr_err("amdgpu_vm_bo_update failed\n");
10751074
return ret;
@@ -1081,8 +1080,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
10811080
static int map_bo_to_gpuvm(struct kgd_mem *mem,
10821081
struct kfd_mem_attachment *entry,
10831082
struct amdgpu_sync *sync,
1084-
bool no_update_pte,
1085-
bool *table_freed)
1083+
bool no_update_pte)
10861084
{
10871085
int ret;
10881086

@@ -1099,7 +1097,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
10991097
if (no_update_pte)
11001098
return 0;
11011099

1102-
ret = update_gpuvm_pte(mem, entry, sync, table_freed);
1100+
ret = update_gpuvm_pte(mem, entry, sync);
11031101
if (ret) {
11041102
pr_err("update_gpuvm_pte() failed\n");
11051103
goto update_gpuvm_pte_failed;
@@ -1393,8 +1391,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
13931391
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
13941392
alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
13951393
alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
1396-
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
1397-
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
1394+
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
13981395
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
13991396
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
14001397
alloc_flags = 0;
@@ -1597,8 +1594,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
15971594
}
15981595

15991596
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1600-
struct kgd_dev *kgd, struct kgd_mem *mem,
1601-
void *drm_priv, bool *table_freed)
1597+
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
16021598
{
16031599
struct amdgpu_device *adev = get_amdgpu_device(kgd);
16041600
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
@@ -1686,7 +1682,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
16861682
entry->va, entry->va + bo_size, entry);
16871683

16881684
ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
1689-
is_invalid_userptr, table_freed);
1685+
is_invalid_userptr);
16901686
if (ret) {
16911687
pr_err("Failed to map bo to gpuvm\n");
16921688
goto out_unreserve;
@@ -2136,7 +2132,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
21362132
continue;
21372133

21382134
kfd_mem_dmaunmap_attachment(mem, attachment);
2139-
ret = update_gpuvm_pte(mem, attachment, &sync, NULL);
2135+
ret = update_gpuvm_pte(mem, attachment, &sync);
21402136
if (ret) {
21412137
pr_err("%s: update PTE failed\n", __func__);
21422138
/* make sure this gets validated again */
@@ -2342,7 +2338,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
23422338
continue;
23432339

23442340
kfd_mem_dmaunmap_attachment(mem, attachment);
2345-
ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL);
2341+
ret = update_gpuvm_pte(mem, attachment, &sync_obj);
23462342
if (ret) {
23472343
pr_debug("Memory eviction: update PTE failed. Try again\n");
23482344
goto validate_map_fail;

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
781781
if (r)
782782
return r;
783783

784-
r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL);
784+
r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
785785
if (r)
786786
return r;
787787

@@ -792,7 +792,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
792792
if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
793793
bo_va = fpriv->csa_va;
794794
BUG_ON(!bo_va);
795-
r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
795+
r = amdgpu_vm_bo_update(adev, bo_va, false);
796796
if (r)
797797
return r;
798798

@@ -811,7 +811,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
811811
if (bo_va == NULL)
812812
continue;
813813

814-
r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
814+
r = amdgpu_vm_bo_update(adev, bo_va, false);
815815
if (r)
816816
return r;
817817

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,6 +1168,7 @@ static const struct pci_device_id pciidlist[] = {
11681168
{0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
11691169

11701170
/* Renoir */
1171+
{0x1002, 0x15E7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
11711172
{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
11721173
{0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
11731174
{0x1002, 0x164C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},

drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
612612

613613
if (operation == AMDGPU_VA_OP_MAP ||
614614
operation == AMDGPU_VA_OP_REPLACE) {
615-
r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
615+
r = amdgpu_vm_bo_update(adev, bo_va, false);
616616
if (r)
617617
goto error;
618618
}

drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,21 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
278278
return true;
279279
}
280280

281+
static void amdgpu_restore_msix(struct amdgpu_device *adev)
282+
{
283+
u16 ctrl;
284+
285+
pci_read_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
286+
if (!(ctrl & PCI_MSIX_FLAGS_ENABLE))
287+
return;
288+
289+
/* VF FLR */
290+
ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
291+
pci_write_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, ctrl);
292+
ctrl |= PCI_MSIX_FLAGS_ENABLE;
293+
pci_write_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, ctrl);
294+
}
295+
281296
/**
282297
* amdgpu_irq_init - initialize interrupt handling
283298
*
@@ -569,6 +584,9 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
569584
{
570585
int i, j, k;
571586

587+
if (amdgpu_sriov_vf(adev))
588+
amdgpu_restore_msix(adev);
589+
572590
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
573591
if (!adev->irq.client[i].sources)
574592
continue;

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -809,7 +809,7 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
809809

810810
/* query/inject/cure begin */
811811
int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
812-
struct ras_query_if *info)
812+
struct ras_query_if *info)
813813
{
814814
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
815815
struct ras_err_data err_data = {0, 0, 0, NULL};
@@ -1043,27 +1043,44 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
10431043
return ret;
10441044
}
10451045

1046-
/* get the total error counts on all IPs */
1047-
void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
1048-
unsigned long *ce_count,
1049-
unsigned long *ue_count)
1046+
/**
1047+
* amdgpu_ras_query_error_count -- Get error counts of all IPs
1048+
* adev: pointer to AMD GPU device
1049+
* ce_count: pointer to an integer to be set to the count of correctible errors.
1050+
* ue_count: pointer to an integer to be set to the count of uncorrectible
1051+
* errors.
1052+
*
1053+
* If set, @ce_count or @ue_count, count and return the corresponding
1054+
* error counts in those integer pointers. Return 0 if the device
1055+
* supports RAS. Return -EOPNOTSUPP if the device doesn't support RAS.
1056+
*/
1057+
int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
1058+
unsigned long *ce_count,
1059+
unsigned long *ue_count)
10501060
{
10511061
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
10521062
struct ras_manager *obj;
10531063
unsigned long ce, ue;
10541064

10551065
if (!adev->ras_enabled || !con)
1056-
return;
1066+
return -EOPNOTSUPP;
1067+
1068+
/* Don't count since no reporting.
1069+
*/
1070+
if (!ce_count && !ue_count)
1071+
return 0;
10571072

10581073
ce = 0;
10591074
ue = 0;
10601075
list_for_each_entry(obj, &con->head, node) {
10611076
struct ras_query_if info = {
10621077
.head = obj->head,
10631078
};
1079+
int res;
10641080

1065-
if (amdgpu_ras_query_error_status(adev, &info))
1066-
return;
1081+
res = amdgpu_ras_query_error_status(adev, &info);
1082+
if (res)
1083+
return res;
10671084

10681085
ce += info.ce_count;
10691086
ue += info.ue_count;
@@ -1074,6 +1091,8 @@ void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
10741091

10751092
if (ue_count)
10761093
*ue_count = ue;
1094+
1095+
return 0;
10771096
}
10781097
/* query/inject/cure end */
10791098

@@ -2137,9 +2156,10 @@ static void amdgpu_ras_counte_dw(struct work_struct *work)
21372156

21382157
/* Cache new values.
21392158
*/
2140-
amdgpu_ras_query_error_count(adev, &ce_count, &ue_count);
2141-
atomic_set(&con->ras_ce_count, ce_count);
2142-
atomic_set(&con->ras_ue_count, ue_count);
2159+
if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) {
2160+
atomic_set(&con->ras_ce_count, ce_count);
2161+
atomic_set(&con->ras_ue_count, ue_count);
2162+
}
21432163

21442164
pm_runtime_mark_last_busy(dev->dev);
21452165
Out:
@@ -2312,9 +2332,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
23122332

23132333
/* Those are the cached values at init.
23142334
*/
2315-
amdgpu_ras_query_error_count(adev, &ce_count, &ue_count);
2316-
atomic_set(&con->ras_ce_count, ce_count);
2317-
atomic_set(&con->ras_ue_count, ue_count);
2335+
if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) {
2336+
atomic_set(&con->ras_ce_count, ce_count);
2337+
atomic_set(&con->ras_ue_count, ue_count);
2338+
}
23182339

23192340
return 0;
23202341
cleanup:

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -490,9 +490,9 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
490490
void amdgpu_ras_resume(struct amdgpu_device *adev);
491491
void amdgpu_ras_suspend(struct amdgpu_device *adev);
492492

493-
void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
494-
unsigned long *ce_count,
495-
unsigned long *ue_count);
493+
int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
494+
unsigned long *ce_count,
495+
unsigned long *ue_count);
496496

497497
/* error handling functions */
498498
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1758,7 +1758,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
17581758
r = vm->update_funcs->commit(&params, fence);
17591759

17601760
if (table_freed)
1761-
*table_freed = *table_freed || params.table_freed;
1761+
*table_freed = params.table_freed;
17621762

17631763
error_unlock:
17641764
amdgpu_vm_eviction_unlock(vm);
@@ -1816,15 +1816,14 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
18161816
* @adev: amdgpu_device pointer
18171817
* @bo_va: requested BO and VM object
18181818
* @clear: if true clear the entries
1819-
* @table_freed: return true if page table is freed
18201819
*
18211820
* Fill in the page table entries for @bo_va.
18221821
*
18231822
* Returns:
18241823
* 0 for success, -EINVAL for failure.
18251824
*/
18261825
int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
1827-
bool clear, bool *table_freed)
1826+
bool clear)
18281827
{
18291828
struct amdgpu_bo *bo = bo_va->base.bo;
18301829
struct amdgpu_vm *vm = bo_va->base.vm;
@@ -1903,7 +1902,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
19031902
resv, mapping->start,
19041903
mapping->last, update_flags,
19051904
mapping->offset, mem,
1906-
pages_addr, last_update, table_freed);
1905+
pages_addr, last_update, NULL);
19071906
if (r)
19081907
return r;
19091908
}
@@ -2155,7 +2154,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
21552154

21562155
list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
21572156
/* Per VM BOs never need to bo cleared in the page tables */
2158-
r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
2157+
r = amdgpu_vm_bo_update(adev, bo_va, false);
21592158
if (r)
21602159
return r;
21612160
}
@@ -2174,7 +2173,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
21742173
else
21752174
clear = true;
21762175

2177-
r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL);
2176+
r = amdgpu_vm_bo_update(adev, bo_va, clear);
21782177
if (r)
21792178
return r;
21802179

0 commit comments

Comments
 (0)