Skip to content

Commit 1ec06c2

Browse files
skeelyamdalexdeucher
authored andcommitted
drm/amdkfd: Account for SH/SE count when setting up cu masks.
On systems with multiple SH per SE compute_static_thread_mgmt_se# is split into independent masks, one for each SH, in the upper and lower 16 bits. We need to detect this and apply cu masking to each SH. The cu mask bits are assigned first to each SE, then to alternate SHs, then finally to higher CU id. This ensures that the maximum number of SPIs are engaged as early as possible while balancing CU assignment to each SH. v2: Use max SH/SE rather than max SH in cu_per_sh. v3: Fix comment blocks, ensure se_mask is initially zero filled, and correctly assign se.sh.cu positions to unset bits in cu_mask. Signed-off-by: Sean Keely <[email protected]> Reviewed-by: Felix Kuehling <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent d035f84 commit 1ec06c2

File tree

2 files changed

+64
-21
lines changed

2 files changed

+64
-21
lines changed

drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -98,36 +98,78 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
9898
uint32_t *se_mask)
9999
{
100100
struct kfd_cu_info cu_info;
101-
uint32_t cu_per_se[KFD_MAX_NUM_SE] = {0};
102-
int i, se, sh, cu = 0;
103-
101+
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
102+
int i, se, sh, cu;
104103
amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info);
105104

106105
if (cu_mask_count > cu_info.cu_active_number)
107106
cu_mask_count = cu_info.cu_active_number;
108107

108+
/* Exceeding these bounds corrupts the stack and indicates a coding error.
109+
* Returning with no CU's enabled will hang the queue, which should be
110+
* attention grabbing.
111+
*/
112+
if (cu_info.num_shader_engines > KFD_MAX_NUM_SE) {
113+
pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n", cu_info.num_shader_engines);
114+
return;
115+
}
116+
if (cu_info.num_shader_arrays_per_engine > KFD_MAX_NUM_SH_PER_SE) {
117+
pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
118+
cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines);
119+
return;
120+
}
121+
/* Count active CUs per SH.
122+
*
123+
* Some CUs in an SH may be disabled. HW expects disabled CUs to be
124+
* represented in the high bits of each SH's enable mask (the upper and lower
125+
* 16 bits of se_mask) and will take care of the actual distribution of
126+
* disabled CUs within each SH automatically.
127+
* Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1.
128+
*
129+
* See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info.
130+
*/
109131
for (se = 0; se < cu_info.num_shader_engines; se++)
110132
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
111-
cu_per_se[se] += hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]);
112-
113-
/* Symmetrically map cu_mask to all SEs:
114-
* cu_mask[0] bit0 -> se_mask[0] bit0;
115-
* cu_mask[0] bit1 -> se_mask[1] bit0;
116-
* ... (if # SE is 4)
117-
* cu_mask[0] bit4 -> se_mask[0] bit1;
133+
cu_per_sh[se][sh] = hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]);
134+
135+
/* Symmetrically map cu_mask to all SEs & SHs:
136+
* se_mask programs up to 2 SH in the upper and lower 16 bits.
137+
*
138+
* Examples
139+
* Assuming 1 SH/SE, 4 SEs:
140+
* cu_mask[0] bit0 -> se_mask[0] bit0
141+
* cu_mask[0] bit1 -> se_mask[1] bit0
142+
* ...
143+
* cu_mask[0] bit4 -> se_mask[0] bit1
144+
* ...
145+
*
146+
* Assuming 2 SH/SE, 4 SEs
147+
* cu_mask[0] bit0 -> se_mask[0] bit0 (SE0,SH0,CU0)
148+
* cu_mask[0] bit1 -> se_mask[1] bit0 (SE1,SH0,CU0)
149+
* ...
150+
* cu_mask[0] bit4 -> se_mask[0] bit16 (SE0,SH1,CU0)
151+
* cu_mask[0] bit5 -> se_mask[1] bit16 (SE1,SH1,CU0)
152+
* ...
153+
* cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1)
118154
* ...
155+
*
156+
* First ensure all CUs are disabled, then enable user specified CUs.
119157
*/
120-
se = 0;
121-
for (i = 0; i < cu_mask_count; i++) {
122-
if (cu_mask[i / 32] & (1 << (i % 32)))
123-
se_mask[se] |= 1 << cu;
124-
125-
do {
126-
se++;
127-
if (se == cu_info.num_shader_engines) {
128-
se = 0;
129-
cu++;
158+
for (i = 0; i < cu_info.num_shader_engines; i++)
159+
se_mask[i] = 0;
160+
161+
i = 0;
162+
for (cu = 0; cu < 16; cu++) {
163+
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
164+
for (se = 0; se < cu_info.num_shader_engines; se++) {
165+
if (cu_per_sh[se][sh] > cu) {
166+
if (cu_mask[i / 32] & (1 << (i % 32)))
167+
se_mask[se] |= 1 << (cu + sh * 16);
168+
i++;
169+
if (i == cu_mask_count)
170+
return;
171+
}
130172
}
131-
} while (cu >= cu_per_se[se] && cu < 32);
173+
}
132174
}
133175
}

drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "kfd_priv.h"
2828

2929
#define KFD_MAX_NUM_SE 8
30+
#define KFD_MAX_NUM_SH_PER_SE 2
3031

3132
/**
3233
* struct mqd_manager

0 commit comments

Comments
 (0)