Skip to content

Commit 9f7042f

Browse files
alexdeuchergregkh
authored andcommitted
drm/amdkfd: drop struct kfd_cu_info
[ Upstream commit 0021d70 ] I think this was an abstraction back from when kfd supported both radeon and amdgpu. Since we just support amdgpu now, there is no more need for this and we can use the amdgpu structures directly. This also avoids having the kfd_cu_info structures on the stack when inlining which can blow up the stack. Cc: Arnd Bergmann <[email protected]> Acked-by: Arnd Bergmann <[email protected]> Reviewed-by: Felix Kuehling <[email protected]> Acked-by: Christian König <[email protected]> Signed-off-by: Alex Deucher <[email protected]> Stable-dep-of: 438b39a ("drm/amdkfd: pause autosuspend when creating pdd") Signed-off-by: Sasha Levin <[email protected]>
1 parent 798f21e commit 9f7042f

File tree

6 files changed

+48
-95
lines changed

6 files changed

+48
-95
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -467,28 +467,6 @@ uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
467467
return 100;
468468
}
469469

470-
void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info)
471-
{
472-
struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
473-
474-
memset(cu_info, 0, sizeof(*cu_info));
475-
if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
476-
return;
477-
478-
cu_info->cu_active_number = acu_info.number;
479-
cu_info->cu_ao_mask = acu_info.ao_cu_mask;
480-
memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
481-
sizeof(cu_info->cu_bitmap));
482-
cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
483-
cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
484-
cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
485-
cu_info->simd_per_cu = acu_info.simd_per_cu;
486-
cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
487-
cu_info->wave_front_size = acu_info.wave_front_size;
488-
cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
489-
cu_info->lds_size = acu_info.lds_size;
490-
}
491-
492470
int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
493471
struct amdgpu_device **dmabuf_adev,
494472
uint64_t *bo_size, void *metadata_buffer,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,8 +235,6 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
235235
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev);
236236

237237
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev);
238-
void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev,
239-
struct kfd_cu_info *cu_info);
240238
int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
241239
struct amdgpu_device **dmabuf_adev,
242240
uint64_t *bo_size, void *metadata_buffer,

drivers/gpu/drm/amd/amdkfd/kfd_crat.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2037,11 +2037,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
20372037
uint32_t proximity_domain)
20382038
{
20392039
struct crat_header *crat_table = (struct crat_header *)pcrat_image;
2040+
struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
2041+
struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
20402042
struct crat_subtype_generic *sub_type_hdr;
20412043
struct kfd_local_mem_info local_mem_info;
20422044
struct kfd_topology_device *peer_dev;
20432045
struct crat_subtype_computeunit *cu;
2044-
struct kfd_cu_info cu_info;
20452046
int avail_size = *size;
20462047
uint32_t total_num_of_cu;
20472048
uint32_t nid = 0;
@@ -2085,21 +2086,20 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
20852086
cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
20862087
cu->proximity_domain = proximity_domain;
20872088

2088-
amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
2089-
cu->num_simd_per_cu = cu_info.simd_per_cu;
2090-
cu->num_simd_cores = cu_info.simd_per_cu *
2091-
(cu_info.cu_active_number / kdev->kfd->num_nodes);
2092-
cu->max_waves_simd = cu_info.max_waves_per_simd;
2089+
cu->num_simd_per_cu = cu_info->simd_per_cu;
2090+
cu->num_simd_cores = cu_info->simd_per_cu *
2091+
(cu_info->number / kdev->kfd->num_nodes);
2092+
cu->max_waves_simd = cu_info->max_waves_per_simd;
20932093

2094-
cu->wave_front_size = cu_info.wave_front_size;
2095-
cu->array_count = cu_info.num_shader_arrays_per_engine *
2096-
cu_info.num_shader_engines;
2097-
total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
2094+
cu->wave_front_size = cu_info->wave_front_size;
2095+
cu->array_count = gfx_info->max_sh_per_se *
2096+
gfx_info->max_shader_engines;
2097+
total_num_of_cu = (cu->array_count * gfx_info->max_cu_per_sh);
20982098
cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
2099-
cu->num_cu_per_array = cu_info.num_cu_per_sh;
2100-
cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
2101-
cu->num_banks = cu_info.num_shader_engines;
2102-
cu->lds_size_in_kb = cu_info.lds_size;
2099+
cu->num_cu_per_array = gfx_info->max_cu_per_sh;
2100+
cu->max_slots_scatch_cu = cu_info->max_scratch_slots_per_cu;
2101+
cu->num_banks = gfx_info->max_shader_engines;
2102+
cu->lds_size_in_kb = cu_info->lds_size;
21032103

21042104
cu->hsa_capability = 0;
21052105

drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
9999
const uint32_t *cu_mask, uint32_t cu_mask_count,
100100
uint32_t *se_mask, uint32_t inst)
101101
{
102-
struct kfd_cu_info cu_info;
102+
struct amdgpu_cu_info *cu_info = &mm->dev->adev->gfx.cu_info;
103+
struct amdgpu_gfx_config *gfx_info = &mm->dev->adev->gfx.config;
103104
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
104105
bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
105106
uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
@@ -108,23 +109,22 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
108109
int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask);
109110
int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1;
110111

111-
amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
112-
113-
cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes;
112+
cu_active_per_node = cu_info->number / mm->dev->kfd->num_nodes;
114113
if (cu_mask_count > cu_active_per_node)
115114
cu_mask_count = cu_active_per_node;
116115

117116
/* Exceeding these bounds corrupts the stack and indicates a coding error.
118117
* Returning with no CU's enabled will hang the queue, which should be
119118
* attention grabbing.
120119
*/
121-
if (cu_info.num_shader_engines > KFD_MAX_NUM_SE) {
122-
pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n", cu_info.num_shader_engines);
120+
if (gfx_info->max_shader_engines > KFD_MAX_NUM_SE) {
121+
pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
122+
gfx_info->max_shader_engines);
123123
return;
124124
}
125-
if (cu_info.num_shader_arrays_per_engine > KFD_MAX_NUM_SH_PER_SE) {
125+
if (gfx_info->max_sh_per_se > KFD_MAX_NUM_SH_PER_SE) {
126126
pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
127-
cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines);
127+
gfx_info->max_sh_per_se * gfx_info->max_shader_engines);
128128
return;
129129
}
130130

@@ -142,10 +142,10 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
142142
* See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info.
143143
* See note on GFX11 cu_bitmap layout in gfx_v11_0_get_cu_info.
144144
*/
145-
for (se = 0; se < cu_info.num_shader_engines; se++)
146-
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
145+
for (se = 0; se < gfx_info->max_shader_engines; se++)
146+
for (sh = 0; sh < gfx_info->max_sh_per_se; sh++)
147147
cu_per_sh[se][sh] = hweight32(
148-
cu_info.cu_bitmap[xcc_inst][se % 4][sh + (se / 4) *
148+
cu_info->bitmap[xcc_inst][se % 4][sh + (se / 4) *
149149
cu_bitmap_sh_mul]);
150150

151151
/* Symmetrically map cu_mask to all SEs & SHs:
@@ -184,13 +184,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
184184
*
185185
* First ensure all CUs are disabled, then enable user specified CUs.
186186
*/
187-
for (i = 0; i < cu_info.num_shader_engines; i++)
187+
for (i = 0; i < gfx_info->max_shader_engines; i++)
188188
se_mask[i] = 0;
189189

190190
i = inst;
191191
for (cu = 0; cu < 16; cu += cu_inc) {
192-
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
193-
for (se = 0; se < cu_info.num_shader_engines; se++) {
192+
for (sh = 0; sh < gfx_info->max_sh_per_se; sh++) {
193+
for (se = 0; se < gfx_info->max_shader_engines; se++) {
194194
if (cu_per_sh[se][sh] > cu) {
195195
if (cu_mask[i / 32] & (en_mask << (i % 32)))
196196
se_mask[se] |= en_mask << (cu + sh * 16);

drivers/gpu/drm/amd/amdkfd/kfd_topology.c

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1537,7 +1537,6 @@ static int kfd_dev_create_p2p_links(void)
15371537
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
15381538
static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
15391539
struct kfd_gpu_cache_info *pcache_info,
1540-
struct kfd_cu_info *cu_info,
15411540
int cu_bitmask,
15421541
int cache_type, unsigned int cu_processor_id,
15431542
int cu_block)
@@ -1599,7 +1598,8 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
15991598
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
16001599
static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
16011600
struct kfd_gpu_cache_info *pcache_info,
1602-
struct kfd_cu_info *cu_info,
1601+
struct amdgpu_cu_info *cu_info,
1602+
struct amdgpu_gfx_config *gfx_info,
16031603
int cache_type, unsigned int cu_processor_id,
16041604
struct kfd_node *knode)
16051605
{
@@ -1610,7 +1610,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
16101610

16111611
start = ffs(knode->xcc_mask) - 1;
16121612
end = start + NUM_XCC(knode->xcc_mask);
1613-
cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
1613+
cu_sibling_map_mask = cu_info->bitmap[start][0][0];
16141614
cu_sibling_map_mask &=
16151615
((1 << pcache_info[cache_type].num_cu_shared) - 1);
16161616
first_active_cu = ffs(cu_sibling_map_mask);
@@ -1646,15 +1646,15 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
16461646
k = 0;
16471647

16481648
for (xcc = start; xcc < end; xcc++) {
1649-
for (i = 0; i < cu_info->num_shader_engines; i++) {
1650-
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
1649+
for (i = 0; i < gfx_info->max_shader_engines; i++) {
1650+
for (j = 0; j < gfx_info->max_sh_per_se; j++) {
16511651
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
16521652
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
16531653
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
16541654
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
16551655
k += 4;
16561656

1657-
cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4];
1657+
cu_sibling_map_mask = cu_info->bitmap[xcc][i % 4][j + i / 4];
16581658
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
16591659
}
16601660
}
@@ -1679,16 +1679,14 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
16791679
unsigned int cu_processor_id;
16801680
int ret;
16811681
unsigned int num_cu_shared;
1682-
struct kfd_cu_info cu_info;
1683-
struct kfd_cu_info *pcu_info;
1682+
struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
1683+
struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
16841684
int gpu_processor_id;
16851685
struct kfd_cache_properties *props_ext;
16861686
int num_of_entries = 0;
16871687
int num_of_cache_types = 0;
16881688
struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
16891689

1690-
amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
1691-
pcu_info = &cu_info;
16921690

16931691
gpu_processor_id = dev->node_props.simd_id_base;
16941692

@@ -1715,12 +1713,12 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
17151713
cu_processor_id = gpu_processor_id;
17161714
if (pcache_info[ct].cache_level == 1) {
17171715
for (xcc = start; xcc < end; xcc++) {
1718-
for (i = 0; i < pcu_info->num_shader_engines; i++) {
1719-
for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
1720-
for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
1716+
for (i = 0; i < gfx_info->max_shader_engines; i++) {
1717+
for (j = 0; j < gfx_info->max_sh_per_se; j++) {
1718+
for (k = 0; k < gfx_info->max_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
17211719

1722-
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
1723-
pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
1720+
ret = fill_in_l1_pcache(&props_ext, pcache_info,
1721+
cu_info->bitmap[xcc][i % 4][j + i / 4], ct,
17241722
cu_processor_id, k);
17251723

17261724
if (ret < 0)
@@ -1733,17 +1731,17 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
17331731

17341732
/* Move to next CU block */
17351733
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
1736-
pcu_info->num_cu_per_sh) ?
1734+
gfx_info->max_cu_per_sh) ?
17371735
pcache_info[ct].num_cu_shared :
1738-
(pcu_info->num_cu_per_sh - k);
1736+
(gfx_info->max_cu_per_sh - k);
17391737
cu_processor_id += num_cu_shared;
17401738
}
17411739
}
17421740
}
17431741
}
17441742
} else {
17451743
ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
1746-
pcu_info, ct, cu_processor_id, kdev);
1744+
cu_info, gfx_info, ct, cu_processor_id, kdev);
17471745

17481746
if (ret < 0)
17491747
break;
@@ -1922,10 +1920,11 @@ int kfd_topology_add_device(struct kfd_node *gpu)
19221920
{
19231921
uint32_t gpu_id;
19241922
struct kfd_topology_device *dev;
1925-
struct kfd_cu_info *cu_info;
19261923
int res = 0;
19271924
int i;
19281925
const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
1926+
struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config;
1927+
struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info;
19291928

19301929
gpu_id = kfd_generate_gpu_id(gpu);
19311930
if (gpu->xcp && !gpu->xcp->ddev) {
@@ -1963,12 +1962,6 @@ int kfd_topology_add_device(struct kfd_node *gpu)
19631962
/* Fill-in additional information that is not available in CRAT but
19641963
* needed for the topology
19651964
*/
1966-
cu_info = kzalloc(sizeof(struct kfd_cu_info), GFP_KERNEL);
1967-
if (!cu_info)
1968-
return -ENOMEM;
1969-
1970-
amdgpu_amdkfd_get_cu_info(dev->gpu->adev, cu_info);
1971-
19721965
for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) {
19731966
dev->node_props.name[i] = __tolower(asic_name[i]);
19741967
if (asic_name[i] == '\0')
@@ -1977,7 +1970,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
19771970
dev->node_props.name[i] = '\0';
19781971

19791972
dev->node_props.simd_arrays_per_engine =
1980-
cu_info->num_shader_arrays_per_engine;
1973+
gfx_info->max_sh_per_se;
19811974

19821975
dev->node_props.gfx_target_version =
19831976
gpu->kfd->device_info.gfx_target_version;
@@ -2058,7 +2051,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
20582051
*/
20592052
if (dev->gpu->adev->asic_type == CHIP_CARRIZO) {
20602053
dev->node_props.simd_count =
2061-
cu_info->simd_per_cu * cu_info->cu_active_number;
2054+
cu_info->simd_per_cu * cu_info->number;
20622055
dev->node_props.max_waves_per_simd = 10;
20632056
}
20642057

@@ -2085,8 +2078,6 @@ int kfd_topology_add_device(struct kfd_node *gpu)
20852078

20862079
kfd_notify_gpu_change(gpu_id, 1);
20872080

2088-
kfree(cu_info);
2089-
20902081
return 0;
20912082
}
20922083

drivers/gpu/drm/amd/include/kgd_kfd_interface.h

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,20 +57,6 @@ struct kfd_vm_fault_info {
5757
bool prot_exec;
5858
};
5959

60-
struct kfd_cu_info {
61-
uint32_t num_shader_engines;
62-
uint32_t num_shader_arrays_per_engine;
63-
uint32_t num_cu_per_sh;
64-
uint32_t cu_active_number;
65-
uint32_t cu_ao_mask;
66-
uint32_t simd_per_cu;
67-
uint32_t max_waves_per_simd;
68-
uint32_t wave_front_size;
69-
uint32_t max_scratch_slots_per_cu;
70-
uint32_t lds_size;
71-
uint32_t cu_bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
72-
};
73-
7460
/* For getting GPU local memory information from KGD */
7561
struct kfd_local_mem_info {
7662
uint64_t local_mem_size_private;

0 commit comments

Comments
 (0)