Skip to content

Commit e82c98f

Browse files
committed
Merge tag 'amd-drm-next-6.4-2023-04-14' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.4-2023-04-14: amdgpu: - S4 fixes for APUs - GFX11 fixes - Misc code cleanups - DCN 3.2 fixes - DCN 3.1.4 fixes - FPO/FAMS work to improve display power savings - DP fixes - UMC 8.10 code cleanup - SDMA v4 fix - GPU clock counter fixes - SMU 13 fixes - Sdma v6 invalidation fix for preemption - RAS fixes - S0ix fix - GC 9.4.3 updates amdkfd: - Fix user pointers with IOMMU - Fix coherency flag handling Signed-off-by: Dave Airlie <[email protected]> From: Alex Deucher <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
2 parents afa351a + 541372b commit e82c98f

File tree

107 files changed

+2118
-730
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+2118
-730
lines changed

drivers/gpu/drm/amd/amdgpu/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ amdgpu-y += \
136136
gfx_v9_0.o \
137137
gfx_v9_4.o \
138138
gfx_v9_4_2.o \
139+
gfx_v9_4_3.o \
139140
gfx_v10_0.o \
140141
imu_v11_0.o \
141142
gfx_v11_0.o \

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ extern char *amdgpu_disable_cu;
185185
extern char *amdgpu_virtual_display;
186186
extern uint amdgpu_pp_feature_mask;
187187
extern uint amdgpu_force_long_training;
188-
extern int amdgpu_job_hang_limit;
189188
extern int amdgpu_lbpw;
190189
extern int amdgpu_compute_multipipe;
191190
extern int amdgpu_gpu_recovery;
@@ -471,7 +470,7 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
471470
/*
472471
* Writeback
473472
*/
474-
#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */
473+
#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */
475474

476475
struct amdgpu_wb {
477476
struct amdgpu_bo *wb_obj;
@@ -1222,7 +1221,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
12221221
((adev)->asic_funcs->flush_hdp ? (adev)->asic_funcs->flush_hdp((adev), (r)) : (adev)->hdp.funcs->flush_hdp((adev), (r)))
12231222
#define amdgpu_asic_invalidate_hdp(adev, r) \
12241223
((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs->invalidate_hdp((adev), (r)) : \
1225-
((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : 0))
1224+
((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : (void)0))
12261225
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
12271226
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
12281227
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))

drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -981,7 +981,12 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev)
981981
*/
982982
bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
983983
{
984-
if (adev->flags & AMD_IS_APU)
984+
if ((adev->flags & AMD_IS_APU) &&
985+
adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU enabled APUs */
986+
return false;
987+
988+
if ((adev->flags & AMD_IS_APU) &&
989+
amdgpu_acpi_is_s3_active(adev))
985990
return false;
986991

987992
if (amdgpu_sriov_vf(adev))

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
9696
size_t *start_offset)
9797
{
9898
/*
99-
* The first num_doorbells are used by amdgpu.
99+
* The first num_kernel_doorbells are used by amdgpu.
100100
* amdkfd takes whatever's left in the aperture.
101101
*/
102102
if (adev->enable_mes) {
@@ -109,11 +109,11 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
109109
*aperture_base = adev->doorbell.base;
110110
*aperture_size = 0;
111111
*start_offset = 0;
112-
} else if (adev->doorbell.size > adev->doorbell.num_doorbells *
112+
} else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells *
113113
sizeof(u32)) {
114114
*aperture_base = adev->doorbell.base;
115115
*aperture_size = adev->doorbell.size;
116-
*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
116+
*start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32);
117117
} else {
118118
*aperture_base = 0;
119119
*aperture_size = 0;

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,25 @@ static bool kfd_mem_is_attached(struct amdgpu_vm *avm,
8282
return false;
8383
}
8484

85+
/**
86+
* reuse_dmamap() - Check whether adev can share the original
87+
* userptr BO
88+
*
89+
* If both adev and bo_adev are in direct mapping or
90+
* in the same iommu group, they can share the original BO.
91+
*
92+
* @adev: Device to which can or cannot share the original BO
93+
* @bo_adev: Device to which allocated BO belongs to
94+
*
95+
* Return: returns true if adev can share original userptr BO,
96+
* false otherwise.
97+
*/
98+
static bool reuse_dmamap(struct amdgpu_device *adev, struct amdgpu_device *bo_adev)
99+
{
100+
return (adev->ram_is_direct_mapped && bo_adev->ram_is_direct_mapped) ||
101+
(adev->dev->iommu_group == bo_adev->dev->iommu_group);
102+
}
103+
85104
/* Set memory usage limits. Current, limits are
86105
* System (TTM + userptr) memory - 15/16th System RAM
87106
* TTM memory - 3/8th System RAM
@@ -253,15 +272,19 @@ create_dmamap_sg_bo(struct amdgpu_device *adev,
253272
struct kgd_mem *mem, struct amdgpu_bo **bo_out)
254273
{
255274
struct drm_gem_object *gem_obj;
256-
int ret, align;
275+
int ret;
276+
uint64_t flags = 0;
257277

258278
ret = amdgpu_bo_reserve(mem->bo, false);
259279
if (ret)
260280
return ret;
261281

262-
align = 1;
263-
ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align,
264-
AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE,
282+
if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)
283+
flags |= mem->bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
284+
AMDGPU_GEM_CREATE_UNCACHED);
285+
286+
ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
287+
AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
265288
ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);
266289

267290
amdgpu_bo_unreserve(mem->bo);
@@ -481,9 +504,6 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
481504
if (unlikely(ret))
482505
goto release_sg;
483506

484-
drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
485-
ttm->num_pages);
486-
487507
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
488508
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
489509
if (ret)
@@ -805,11 +825,11 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
805825
va + bo_size, vm);
806826

807827
if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
808-
(amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) ||
809-
same_hive) {
828+
(amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) ||
829+
same_hive) {
810830
/* Mappings on the local GPU, or VRAM mappings in the
811-
* local hive, or userptr mapping IOMMU direct map mode
812-
* share the original BO
831+
* local hive, or userptr mapping can reuse dma map
832+
* address space share the original BO
813833
*/
814834
attachment[i]->type = KFD_MEM_ATT_SHARED;
815835
bo[i] = mem->bo;

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
602602
if (amdgpu_device_skip_hw_access(adev))
603603
return 0;
604604

605-
if (index < adev->doorbell.num_doorbells) {
605+
if (index < adev->doorbell.num_kernel_doorbells) {
606606
return readl(adev->doorbell.ptr + index);
607607
} else {
608608
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
@@ -625,7 +625,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
625625
if (amdgpu_device_skip_hw_access(adev))
626626
return;
627627

628-
if (index < adev->doorbell.num_doorbells) {
628+
if (index < adev->doorbell.num_kernel_doorbells) {
629629
writel(v, adev->doorbell.ptr + index);
630630
} else {
631631
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
@@ -646,7 +646,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
646646
if (amdgpu_device_skip_hw_access(adev))
647647
return 0;
648648

649-
if (index < adev->doorbell.num_doorbells) {
649+
if (index < adev->doorbell.num_kernel_doorbells) {
650650
return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
651651
} else {
652652
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
@@ -669,7 +669,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
669669
if (amdgpu_device_skip_hw_access(adev))
670670
return;
671671

672-
if (index < adev->doorbell.num_doorbells) {
672+
if (index < adev->doorbell.num_kernel_doorbells) {
673673
atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
674674
} else {
675675
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
@@ -1060,7 +1060,7 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
10601060
if (adev->asic_type < CHIP_BONAIRE) {
10611061
adev->doorbell.base = 0;
10621062
adev->doorbell.size = 0;
1063-
adev->doorbell.num_doorbells = 0;
1063+
adev->doorbell.num_kernel_doorbells = 0;
10641064
adev->doorbell.ptr = NULL;
10651065
return 0;
10661066
}
@@ -1075,27 +1075,27 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
10751075
adev->doorbell.size = pci_resource_len(adev->pdev, 2);
10761076

10771077
if (adev->enable_mes) {
1078-
adev->doorbell.num_doorbells =
1078+
adev->doorbell.num_kernel_doorbells =
10791079
adev->doorbell.size / sizeof(u32);
10801080
} else {
1081-
adev->doorbell.num_doorbells =
1081+
adev->doorbell.num_kernel_doorbells =
10821082
min_t(u32, adev->doorbell.size / sizeof(u32),
10831083
adev->doorbell_index.max_assignment+1);
1084-
if (adev->doorbell.num_doorbells == 0)
1084+
if (adev->doorbell.num_kernel_doorbells == 0)
10851085
return -EINVAL;
10861086

10871087
/* For Vega, reserve and map two pages on doorbell BAR since SDMA
10881088
* paging queue doorbell use the second page. The
10891089
* AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
10901090
* doorbells are in the first page. So with paging queue enabled,
1091-
* the max num_doorbells should + 1 page (0x400 in dword)
1091+
* the max num_kernel_doorbells should + 1 page (0x400 in dword)
10921092
*/
10931093
if (adev->asic_type >= CHIP_VEGA10)
1094-
adev->doorbell.num_doorbells += 0x400;
1094+
adev->doorbell.num_kernel_doorbells += 0x400;
10951095
}
10961096

10971097
adev->doorbell.ptr = ioremap(adev->doorbell.base,
1098-
adev->doorbell.num_doorbells *
1098+
adev->doorbell.num_kernel_doorbells *
10991099
sizeof(u32));
11001100
if (adev->doorbell.ptr == NULL)
11011101
return -ENOMEM;
@@ -2184,7 +2184,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
21842184
adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
21852185
}
21862186

2187-
amdgpu_amdkfd_device_probe(adev);
21882187

21892188
adev->pm.pp_feature = amdgpu_pp_feature_mask;
21902189
if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
@@ -2240,6 +2239,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
22402239
if (!total)
22412240
return -ENODEV;
22422241

2242+
amdgpu_amdkfd_device_probe(adev);
22432243
adev->cg_flags &= amdgpu_cg_mask;
22442244
adev->pg_flags &= amdgpu_pg_mask;
22452245

@@ -2365,7 +2365,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
23652365
}
23662366

23672367
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2368-
ring->num_hw_submission, amdgpu_job_hang_limit,
2368+
ring->num_hw_submission, 0,
23692369
timeout, adev->reset_domain->wq,
23702370
ring->sched_score, ring->name,
23712371
adev->dev);
@@ -3305,9 +3305,11 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
33053305
{
33063306
int r;
33073307

3308-
r = amdgpu_amdkfd_resume_iommu(adev);
3309-
if (r)
3310-
return r;
3308+
if (!adev->in_s0ix) {
3309+
r = amdgpu_amdkfd_resume_iommu(adev);
3310+
if (r)
3311+
return r;
3312+
}
33113313

33123314
r = amdgpu_device_ip_resume_phase1(adev);
33133315
if (r)

drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1502,6 +1502,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
15021502
case IP_VERSION(9, 4, 0):
15031503
case IP_VERSION(9, 4, 1):
15041504
case IP_VERSION(9, 4, 2):
1505+
case IP_VERSION(9, 4, 3):
15051506
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
15061507
break;
15071508
case IP_VERSION(10, 1, 10):

drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
*
2222
*/
2323

24+
#ifndef AMDGPU_DOORBELL_H
25+
#define AMDGPU_DOORBELL_H
26+
2427
/*
2528
* GPU doorbell structures, functions & helpers
2629
*/
@@ -29,7 +32,9 @@ struct amdgpu_doorbell {
2932
resource_size_t base;
3033
resource_size_t size;
3134
u32 __iomem *ptr;
32-
u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */
35+
36+
/* Number of doorbells reserved for amdgpu kernel driver */
37+
u32 num_kernel_doorbells;
3338
};
3439

3540
/* Reserved doorbells for amdgpu (including multimedia).
@@ -306,3 +311,4 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
306311
#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
307312
#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
308313

314+
#endif

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@ char *amdgpu_virtual_display;
157157
*/
158158
uint amdgpu_pp_feature_mask = 0xfff7bfff;
159159
uint amdgpu_force_long_training;
160-
int amdgpu_job_hang_limit;
161160
int amdgpu_lbpw = -1;
162161
int amdgpu_compute_multipipe = -1;
163162
int amdgpu_gpu_recovery = -1; /* auto */
@@ -520,13 +519,6 @@ MODULE_PARM_DESC(virtual_display,
520519
"Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)");
521520
module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
522521

523-
/**
524-
* DOC: job_hang_limit (int)
525-
* Set how much time allow a job hang and not drop it. The default is 0.
526-
*/
527-
MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)");
528-
module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);
529-
530522
/**
531523
* DOC: lbpw (int)
532524
* Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled).

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
305305
ring->ring_obj = NULL;
306306
ring->use_doorbell = true;
307307
ring->doorbell_index = adev->doorbell_index.kiq;
308+
ring->vm_hub = AMDGPU_GFXHUB_0;
308309

309310
r = amdgpu_gfx_kiq_acquire(adev, ring);
310311
if (r)

0 commit comments

Comments
 (0)