Skip to content

Commit aeeb82f

Browse files
committed
Merge tag 'amd-drm-fixes-5.16-2021-12-29' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-5.16-2021-12-29: amdgpu: - Fencing fix - XGMI fix - VCN regression fix - IP discovery regression fixes - Fix runpm documentation - Suspend/resume fixes - Yellow Carp display fixes - MCLK power management fix Signed-off-by: Dave Airlie <[email protected]> From: Alex Deucher <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
2 parents 05097b1 + ee2698c commit aeeb82f

File tree

23 files changed

+300
-98
lines changed

23 files changed

+300
-98
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3166,6 +3166,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
31663166
bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
31673167
{
31683168
switch (asic_type) {
3169+
#ifdef CONFIG_DRM_AMDGPU_SI
3170+
case CHIP_HAINAN:
3171+
#endif
3172+
case CHIP_TOPAZ:
3173+
/* chips with no display hardware */
3174+
return false;
31693175
#if defined(CONFIG_DRM_AMD_DC)
31703176
case CHIP_TAHITI:
31713177
case CHIP_PITCAIRN:
@@ -4461,7 +4467,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
44614467
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
44624468
struct amdgpu_reset_context *reset_context)
44634469
{
4464-
int i, j, r = 0;
4470+
int i, r = 0;
44654471
struct amdgpu_job *job = NULL;
44664472
bool need_full_reset =
44674473
test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
@@ -4483,15 +4489,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
44834489

44844490
/*clear job fence from fence drv to avoid force_completion
44854491
*leave NULL and vm flush fence in fence drv */
4486-
for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) {
4487-
struct dma_fence *old, **ptr;
4492+
amdgpu_fence_driver_clear_job_fences(ring);
44884493

4489-
ptr = &ring->fence_drv.fences[j];
4490-
old = rcu_dereference_protected(*ptr, 1);
4491-
if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &old->flags)) {
4492-
RCU_INIT_POINTER(*ptr, NULL);
4493-
}
4494-
}
44954494
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
44964495
amdgpu_fence_driver_force_completion(ring);
44974496
}

drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c

Lines changed: 54 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -526,39 +526,71 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
526526
}
527527
}
528528

529+
union gc_info {
530+
struct gc_info_v1_0 v1;
531+
struct gc_info_v2_0 v2;
532+
};
533+
529534
int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
530535
{
531536
struct binary_header *bhdr;
532-
struct gc_info_v1_0 *gc_info;
537+
union gc_info *gc_info;
533538

534539
if (!adev->mman.discovery_bin) {
535540
DRM_ERROR("ip discovery uninitialized\n");
536541
return -EINVAL;
537542
}
538543

539544
bhdr = (struct binary_header *)adev->mman.discovery_bin;
540-
gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin +
545+
gc_info = (union gc_info *)(adev->mman.discovery_bin +
541546
le16_to_cpu(bhdr->table_list[GC].offset));
542-
543-
adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
544-
adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
545-
le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
546-
adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
547-
adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
548-
adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
549-
adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
550-
adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
551-
adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
552-
adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
553-
adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
554-
adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
555-
adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
556-
adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
557-
adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
558-
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
559-
le32_to_cpu(gc_info->gc_num_sa_per_se);
560-
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);
561-
547+
switch (gc_info->v1.header.version_major) {
548+
case 1:
549+
adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
550+
adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
551+
le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
552+
adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
553+
adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
554+
adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
555+
adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
556+
adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
557+
adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
558+
adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
559+
adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
560+
adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
561+
adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
562+
adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
563+
adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
564+
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
565+
le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
566+
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
567+
break;
568+
case 2:
569+
adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
570+
adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
571+
adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
572+
adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
573+
adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
574+
adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
575+
adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
576+
adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
577+
adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
578+
adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
579+
adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
580+
adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
581+
adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
582+
adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
583+
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
584+
le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
585+
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
586+
break;
587+
default:
588+
dev_err(adev->dev,
589+
"Unhandled GC info table %d.%d\n",
590+
gc_info->v1.header.version_major,
591+
gc_info->v1.header.version_minor);
592+
return -EINVAL;
593+
}
562594
return 0;
563595
}
564596

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -328,10 +328,11 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);
328328

329329
/**
330330
* DOC: runpm (int)
331-
* Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down
332-
* the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality.
331+
* Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down
332+
* the dGPUs when they are idle if supported. The default is -1 (auto enable).
333+
* Setting the value to 0 disables this functionality.
333334
*/
334-
MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)");
335+
MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto)");
335336
module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
336337

337338
/**
@@ -2153,7 +2154,10 @@ static int amdgpu_pmops_suspend(struct device *dev)
21532154
adev->in_s3 = true;
21542155
r = amdgpu_device_suspend(drm_dev, true);
21552156
adev->in_s3 = false;
2156-
2157+
if (r)
2158+
return r;
2159+
if (!adev->in_s0ix)
2160+
r = amdgpu_asic_reset(adev);
21572161
return r;
21582162
}
21592163

@@ -2234,12 +2238,27 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
22342238
if (amdgpu_device_supports_px(drm_dev))
22352239
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
22362240

2241+
/*
2242+
* By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some
2243+
* proper cleanups and put itself into a state ready for PNP. That
2244+
* can address some random resuming failure observed on BOCO capable
2245+
* platforms.
2246+
* TODO: this may be also needed for PX capable platform.
2247+
*/
2248+
if (amdgpu_device_supports_boco(drm_dev))
2249+
adev->mp1_state = PP_MP1_STATE_UNLOAD;
2250+
22372251
ret = amdgpu_device_suspend(drm_dev, false);
22382252
if (ret) {
22392253
adev->in_runpm = false;
2254+
if (amdgpu_device_supports_boco(drm_dev))
2255+
adev->mp1_state = PP_MP1_STATE_NONE;
22402256
return ret;
22412257
}
22422258

2259+
if (amdgpu_device_supports_boco(drm_dev))
2260+
adev->mp1_state = PP_MP1_STATE_NONE;
2261+
22432262
if (amdgpu_device_supports_px(drm_dev)) {
22442263
/* Only need to handle PCI state in the driver for ATPX
22452264
* PCI core handles it for _PR3.

drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

Lines changed: 87 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,13 @@ void amdgpu_fence_slab_fini(void)
7777
* Cast helper
7878
*/
7979
static const struct dma_fence_ops amdgpu_fence_ops;
80+
static const struct dma_fence_ops amdgpu_job_fence_ops;
8081
static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
8182
{
8283
struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
8384

84-
if (__f->base.ops == &amdgpu_fence_ops)
85+
if (__f->base.ops == &amdgpu_fence_ops ||
86+
__f->base.ops == &amdgpu_job_fence_ops)
8587
return __f;
8688

8789
return NULL;
@@ -158,19 +160,18 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
158160
}
159161

160162
seq = ++ring->fence_drv.sync_seq;
161-
if (job != NULL && job->job_run_counter) {
163+
if (job && job->job_run_counter) {
162164
/* reinit seq for resubmitted jobs */
163165
fence->seqno = seq;
164166
} else {
165-
dma_fence_init(fence, &amdgpu_fence_ops,
166-
&ring->fence_drv.lock,
167-
adev->fence_context + ring->idx,
168-
seq);
169-
}
170-
171-
if (job != NULL) {
172-
/* mark this fence has a parent job */
173-
set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &fence->flags);
167+
if (job)
168+
dma_fence_init(fence, &amdgpu_job_fence_ops,
169+
&ring->fence_drv.lock,
170+
adev->fence_context + ring->idx, seq);
171+
else
172+
dma_fence_init(fence, &amdgpu_fence_ops,
173+
&ring->fence_drv.lock,
174+
adev->fence_context + ring->idx, seq);
174175
}
175176

176177
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
@@ -620,6 +621,25 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
620621
}
621622
}
622623

624+
/**
625+
* amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring
626+
*
627+
* @ring: fence of the ring to be cleared
628+
*
629+
*/
630+
void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
631+
{
632+
int i;
633+
struct dma_fence *old, **ptr;
634+
635+
for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
636+
ptr = &ring->fence_drv.fences[i];
637+
old = rcu_dereference_protected(*ptr, 1);
638+
if (old && old->ops == &amdgpu_job_fence_ops)
639+
RCU_INIT_POINTER(*ptr, NULL);
640+
}
641+
}
642+
623643
/**
624644
* amdgpu_fence_driver_force_completion - force signal latest fence of ring
625645
*
@@ -643,16 +663,14 @@ static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)
643663

644664
static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
645665
{
646-
struct amdgpu_ring *ring;
666+
return (const char *)to_amdgpu_fence(f)->ring->name;
667+
}
647668

648-
if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) {
649-
struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
669+
static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
670+
{
671+
struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
650672

651-
ring = to_amdgpu_ring(job->base.sched);
652-
} else {
653-
ring = to_amdgpu_fence(f)->ring;
654-
}
655-
return (const char *)ring->name;
673+
return (const char *)to_amdgpu_ring(job->base.sched)->name;
656674
}
657675

658676
/**
@@ -665,18 +683,25 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
665683
*/
666684
static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
667685
{
668-
struct amdgpu_ring *ring;
686+
if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer))
687+
amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring);
669688

670-
if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) {
671-
struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
689+
return true;
690+
}
672691

673-
ring = to_amdgpu_ring(job->base.sched);
674-
} else {
675-
ring = to_amdgpu_fence(f)->ring;
676-
}
692+
/**
693+
* amdgpu_job_fence_enable_signaling - enable signalling on job fence
694+
* @f: fence
695+
*
696+
* This is the simliar function with amdgpu_fence_enable_signaling above, it
697+
* only handles the job embedded fence.
698+
*/
699+
static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
700+
{
701+
struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
677702

678-
if (!timer_pending(&ring->fence_drv.fallback_timer))
679-
amdgpu_fence_schedule_fallback(ring);
703+
if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
704+
amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
680705

681706
return true;
682707
}
@@ -692,19 +717,23 @@ static void amdgpu_fence_free(struct rcu_head *rcu)
692717
{
693718
struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
694719

695-
if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) {
696-
/* free job if fence has a parent job */
697-
struct amdgpu_job *job;
698-
699-
job = container_of(f, struct amdgpu_job, hw_fence);
700-
kfree(job);
701-
} else {
702720
/* free fence_slab if it's separated fence*/
703-
struct amdgpu_fence *fence;
721+
kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f));
722+
}
704723

705-
fence = to_amdgpu_fence(f);
706-
kmem_cache_free(amdgpu_fence_slab, fence);
707-
}
724+
/**
725+
* amdgpu_job_fence_free - free up the job with embedded fence
726+
*
727+
* @rcu: RCU callback head
728+
*
729+
* Free up the job with embedded fence after the RCU grace period.
730+
*/
731+
static void amdgpu_job_fence_free(struct rcu_head *rcu)
732+
{
733+
struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
734+
735+
/* free job if fence has a parent job */
736+
kfree(container_of(f, struct amdgpu_job, hw_fence));
708737
}
709738

710739
/**
@@ -720,13 +749,32 @@ static void amdgpu_fence_release(struct dma_fence *f)
720749
call_rcu(&f->rcu, amdgpu_fence_free);
721750
}
722751

752+
/**
753+
* amdgpu_job_fence_release - callback that job embedded fence can be freed
754+
*
755+
* @f: fence
756+
*
757+
* This is the simliar function with amdgpu_fence_release above, it
758+
* only handles the job embedded fence.
759+
*/
760+
static void amdgpu_job_fence_release(struct dma_fence *f)
761+
{
762+
call_rcu(&f->rcu, amdgpu_job_fence_free);
763+
}
764+
723765
static const struct dma_fence_ops amdgpu_fence_ops = {
724766
.get_driver_name = amdgpu_fence_get_driver_name,
725767
.get_timeline_name = amdgpu_fence_get_timeline_name,
726768
.enable_signaling = amdgpu_fence_enable_signaling,
727769
.release = amdgpu_fence_release,
728770
};
729771

772+
static const struct dma_fence_ops amdgpu_job_fence_ops = {
773+
.get_driver_name = amdgpu_fence_get_driver_name,
774+
.get_timeline_name = amdgpu_job_fence_get_timeline_name,
775+
.enable_signaling = amdgpu_job_fence_enable_signaling,
776+
.release = amdgpu_job_fence_release,
777+
};
730778

731779
/*
732780
* Fence debugfs

0 commit comments

Comments
 (0)