Skip to content

Commit e72da82

Browse files
committed
Merge tag 'drm-fixes-2024-12-14' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie: "This is the weekly fixes pull for drm. Just has i915, xe and amdgpu changes in it. Nothing too major in here: i915: - Don't use indexed register writes needlessly [dsb] - Stop using non-posted DSB writes for legacy LUT [color] - Fix NULL pointer dereference in capture_engine - Fix memory leak by correcting cache object name in error handler xe: - Fix a KUNIT test error message (Mirsad Todorovac) - Fix an invalidation fence PM ref leak (Daniele) - Fix a register pool UAF (Lucas) amdgpu: - ISP hw init fix - SR-IOV fixes - Fix contiguous VRAM mapping for UVD on older GPUs - Fix some regressions due to drm scheduler changes - Workload profile fixes - Cleaner shader fix amdkfd: - Fix DMA map direction for migration - Fix a potential null pointer dereference - Cacheline size fixes - Runtime PM fix" * tag 'drm-fixes-2024-12-14' of https://gitlab.freedesktop.org/drm/kernel: drm/xe/reg_sr: Remove register pool drm/xe: Call invalidation_fence_fini for PT inval fences in error state drm/xe: fix the ERR_PTR() returned on failure to allocate tiny pt drm/amdkfd: pause autosuspend when creating pdd drm/amdgpu: fix when the cleaner shader is emitted drm/amdgpu: Fix ISP HW init issue drm/amdkfd: hard-code MALL cacheline size for gfx11, gfx12 drm/amdkfd: hard-code cacheline size for gfx11 drm/amdkfd: Dereference null return value drm/i915: Fix memory leak by correcting cache object name in error handler drm/i915: Fix NULL pointer dereference in capture_engine drm/i915/color: Stop using non-posted DSB writes for legacy LUT drm/i915/dsb: Don't use indexed register writes needlessly drm/amdkfd: Correct the migration DMA map direction drm/amd/pm: Set SMU v13.0.7 default workload type drm/amd/pm: Initialize power profile mode amdgpu/uvd: get ring reference from rq scheduler drm/amdgpu: fix UVD contiguous CS mapping problem drm/amdgpu: use sjt mec fw on gfx943 for sriov Revert "drm/amdgpu: Fix ISP hw init issue"
2 parents 974acf9 + d172ea6 commit e72da82

25 files changed

+200
-125
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,13 +1801,18 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
18011801
if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
18021802
return -EINVAL;
18031803

1804+
/* Make sure VRAM is allocated contigiously */
18041805
(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1805-
amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
1806-
for (i = 0; i < (*bo)->placement.num_placement; i++)
1807-
(*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
1808-
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
1809-
if (r)
1810-
return r;
1806+
if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&
1807+
!((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
1808+
1809+
amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
1810+
for (i = 0; i < (*bo)->placement.num_placement; i++)
1811+
(*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
1812+
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
1813+
if (r)
1814+
return r;
1815+
}
18111816

18121817
return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
18131818
}

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ const char *amdgpu_asic_name[] = {
145145
"LAST",
146146
};
147147

148-
#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM, 0)
148+
#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0)
149149
/*
150150
* Default init level where all blocks are expected to be initialized. This is
151151
* the level of initialization expected by default and also after a full reset

drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,8 @@ static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
551551
for (i = 0; i < abo->placement.num_placement; ++i) {
552552
abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
553553
abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
554+
if (abo->placements[i].mem_type == TTM_PL_VRAM)
555+
abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
554556
}
555557
}
556558

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -674,12 +674,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
674674
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
675675
ring->funcs->emit_wreg;
676676

677-
if (adev->gfx.enable_cleaner_shader &&
678-
ring->funcs->emit_cleaner_shader &&
679-
job->enforce_isolation)
680-
ring->funcs->emit_cleaner_shader(ring);
681-
682-
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
677+
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
678+
!(job->enforce_isolation && !job->vmid))
683679
return 0;
684680

685681
amdgpu_ring_ib_begin(ring);
@@ -690,6 +686,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
690686
if (need_pipe_sync)
691687
amdgpu_ring_emit_pipeline_sync(ring);
692688

689+
if (adev->gfx.enable_cleaner_shader &&
690+
ring->funcs->emit_cleaner_shader &&
691+
job->enforce_isolation)
692+
ring->funcs->emit_cleaner_shader(ring);
693+
693694
if (vm_flush_needed) {
694695
trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
695696
amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);

drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
4545
MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin");
4646
MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
4747
MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
48+
MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin");
49+
MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin");
4850

4951
#define GFX9_MEC_HPD_SIZE 4096
5052
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -574,8 +576,12 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
574576
{
575577
int err;
576578

577-
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
578-
"amdgpu/%s_mec.bin", chip_name);
579+
if (amdgpu_sriov_vf(adev))
580+
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
581+
"amdgpu/%s_sjt_mec.bin", chip_name);
582+
else
583+
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
584+
"amdgpu/%s_mec.bin", chip_name);
579585
if (err)
580586
goto out;
581587
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);

drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1288,7 +1288,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
12881288
struct amdgpu_job *job,
12891289
struct amdgpu_ib *ib)
12901290
{
1291-
struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
1291+
struct amdgpu_ring *ring = amdgpu_job_ring(job);
12921292
unsigned i;
12931293

12941294
/* No patching necessary for the first instance */

drivers/gpu/drm/amd/amdkfd/kfd_crat.c

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1423,6 +1423,7 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
14231423

14241424

14251425
static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
1426+
bool cache_line_size_missing,
14261427
struct kfd_gpu_cache_info *pcache_info)
14271428
{
14281429
struct amdgpu_device *adev = kdev->adev;
@@ -1437,6 +1438,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
14371438
CRAT_CACHE_FLAGS_SIMD_CACHE);
14381439
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
14391440
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
1441+
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
1442+
pcache_info[i].cache_line_size = 128;
14401443
i++;
14411444
}
14421445
/* Scalar L1 Instruction Cache per SQC */
@@ -1449,6 +1452,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
14491452
CRAT_CACHE_FLAGS_SIMD_CACHE);
14501453
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
14511454
pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
1455+
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
1456+
pcache_info[i].cache_line_size = 128;
14521457
i++;
14531458
}
14541459
/* Scalar L1 Data Cache per SQC */
@@ -1460,6 +1465,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
14601465
CRAT_CACHE_FLAGS_SIMD_CACHE);
14611466
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
14621467
pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
1468+
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
1469+
pcache_info[i].cache_line_size = 64;
14631470
i++;
14641471
}
14651472
/* GL1 Data Cache per SA */
@@ -1472,7 +1479,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
14721479
CRAT_CACHE_FLAGS_DATA_CACHE |
14731480
CRAT_CACHE_FLAGS_SIMD_CACHE);
14741481
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1475-
pcache_info[i].cache_line_size = 0;
1482+
if (cache_line_size_missing)
1483+
pcache_info[i].cache_line_size = 128;
14761484
i++;
14771485
}
14781486
/* L2 Data Cache per GPU (Total Tex Cache) */
@@ -1484,6 +1492,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
14841492
CRAT_CACHE_FLAGS_SIMD_CACHE);
14851493
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
14861494
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
1495+
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
1496+
pcache_info[i].cache_line_size = 128;
14871497
i++;
14881498
}
14891499
/* L3 Data Cache per GPU */
@@ -1494,7 +1504,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
14941504
CRAT_CACHE_FLAGS_DATA_CACHE |
14951505
CRAT_CACHE_FLAGS_SIMD_CACHE);
14961506
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1497-
pcache_info[i].cache_line_size = 0;
1507+
pcache_info[i].cache_line_size = 64;
14981508
i++;
14991509
}
15001510
return i;
@@ -1569,6 +1579,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
15691579
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
15701580
{
15711581
int num_of_cache_types = 0;
1582+
bool cache_line_size_missing = false;
15721583

15731584
switch (kdev->adev->asic_type) {
15741585
case CHIP_KAVERI:
@@ -1692,10 +1703,17 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
16921703
case IP_VERSION(11, 5, 0):
16931704
case IP_VERSION(11, 5, 1):
16941705
case IP_VERSION(11, 5, 2):
1706+
/* Cacheline size not available in IP discovery for gc11.
1707+
* kfd_fill_gpu_cache_info_from_gfx_config to hard code it
1708+
*/
1709+
cache_line_size_missing = true;
1710+
fallthrough;
16951711
case IP_VERSION(12, 0, 0):
16961712
case IP_VERSION(12, 0, 1):
16971713
num_of_cache_types =
1698-
kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
1714+
kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd,
1715+
cache_line_size_missing,
1716+
*pcache_info);
16991717
break;
17001718
default:
17011719
*pcache_info = dummy_cache_info;

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,21 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
207207
if (!down_read_trylock(&adev->reset_domain->sem))
208208
return -EIO;
209209

210+
if (!pdd->proc_ctx_cpu_ptr) {
211+
r = amdgpu_amdkfd_alloc_gtt_mem(adev,
212+
AMDGPU_MES_PROC_CTX_SIZE,
213+
&pdd->proc_ctx_bo,
214+
&pdd->proc_ctx_gpu_addr,
215+
&pdd->proc_ctx_cpu_ptr,
216+
false);
217+
if (r) {
218+
dev_err(adev->dev,
219+
"failed to allocate process context bo\n");
220+
return r;
221+
}
222+
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
223+
}
224+
210225
memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
211226
queue_input.process_id = qpd->pqm->process->pasid;
212227
queue_input.page_table_base_addr = qpd->page_table_base;

drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
306306
spage = migrate_pfn_to_page(migrate->src[i]);
307307
if (spage && !is_zone_device_page(spage)) {
308308
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
309-
DMA_TO_DEVICE);
309+
DMA_BIDIRECTIONAL);
310310
r = dma_mapping_error(dev, src[i]);
311311
if (r) {
312312
dev_err(dev, "%s: fail %d dma_map_page\n",
@@ -629,7 +629,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
629629
goto out_oom;
630630
}
631631

632-
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
632+
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
633633
r = dma_mapping_error(dev, dst[i]);
634634
if (r) {
635635
dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);

drivers/gpu/drm/amd/amdkfd/kfd_process.c

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,7 +1076,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
10761076

10771077
kfd_free_process_doorbells(pdd->dev->kfd, pdd);
10781078

1079-
if (pdd->dev->kfd->shared_resources.enable_mes)
1079+
if (pdd->dev->kfd->shared_resources.enable_mes &&
1080+
pdd->proc_ctx_cpu_ptr)
10801081
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
10811082
&pdd->proc_ctx_bo);
10821083
/*
@@ -1608,7 +1609,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
16081609
struct kfd_process *p)
16091610
{
16101611
struct kfd_process_device *pdd = NULL;
1611-
int retval = 0;
16121612

16131613
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
16141614
return NULL;
@@ -1632,21 +1632,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
16321632
pdd->user_gpu_id = dev->id;
16331633
atomic64_set(&pdd->evict_duration_counter, 0);
16341634

1635-
if (dev->kfd->shared_resources.enable_mes) {
1636-
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
1637-
AMDGPU_MES_PROC_CTX_SIZE,
1638-
&pdd->proc_ctx_bo,
1639-
&pdd->proc_ctx_gpu_addr,
1640-
&pdd->proc_ctx_cpu_ptr,
1641-
false);
1642-
if (retval) {
1643-
dev_err(dev->adev->dev,
1644-
"failed to allocate process context bo\n");
1645-
goto err_free_pdd;
1646-
}
1647-
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
1648-
}
1649-
16501635
p->pdds[p->n_pdds++] = pdd;
16511636
if (kfd_dbg_is_per_vmid_supported(pdd->dev))
16521637
pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
@@ -1658,10 +1643,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
16581643
idr_init(&pdd->alloc_idr);
16591644

16601645
return pdd;
1661-
1662-
err_free_pdd:
1663-
kfree(pdd);
1664-
return NULL;
16651646
}
16661647

16671648
/**

0 commit comments

Comments
 (0)