Skip to content

Commit e4cb582

Browse files
committed
drm/xe: Count dwords before allocating
The bb allocation in emit_wa_job() is wrong in 2 ways: first it's allocating enough space for the 3DSTATE or hardcoding 4k depending on the engine. In the first case it doesn't account for the WAs and in the former it may not be sufficient. Secondly it's using the size instead of number of dwords, causing the buffer to be 4x bigger than needed: xe_bb_new() receives number of dwords as parameter and its declaration was also not following its implementation. Lastly, reword the debug message since it's not only about the LRC WAs anymore as it also include the 3DSTATE for render. While it's unlikely this is causing any real issue, let's calculate the needed space and allocate just enough. Reviewed-by: Tvrtko Ursulin <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Lucas De Marchi <[email protected]>
1 parent 76650bc commit e4cb582

File tree

2 files changed

+25
-15
lines changed

2 files changed

+25
-15
lines changed

drivers/gpu/drm/xe/xe_bb.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ struct xe_gt;
1414
struct xe_exec_queue;
1515
struct xe_sched_job;
1616

17-
struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm);
17+
struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm);
1818
struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
1919
struct xe_bb *bb);
2020
struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,

drivers/gpu/drm/xe/xe_gt.c

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -189,16 +189,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
189189
long timeout;
190190
int count_rmw = 0;
191191
int count = 0;
192-
193-
if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
194-
/* Big enough to emit all of the context's 3DSTATE */
195-
bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false);
196-
else
197-
/* Just pick a large BB size */
198-
bb = xe_bb_new(gt, SZ_4K, false);
199-
200-
if (IS_ERR(bb))
201-
return PTR_ERR(bb);
192+
size_t bb_len = 0;
202193

203194
/* count RMW registers as those will be handled separately */
204195
xa_for_each(&sr->xa, idx, entry) {
@@ -208,11 +199,30 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
208199
++count_rmw;
209200
}
210201

211-
if (count || count_rmw)
212-
xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
202+
if (count)
203+
bb_len += count * 2 + 1;
204+
205+
if (count_rmw)
206+
bb_len += count_rmw * 20 + 7;
207+
208+
if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
209+
/*
210+
* Big enough to emit all of the context's 3DSTATE via
211+
* xe_lrc_emit_hwe_state_instructions()
212+
*/
213+
bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32);
214+
215+
xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len);
216+
217+
bb = xe_bb_new(gt, bb_len, false);
218+
if (IS_ERR(bb))
219+
return PTR_ERR(bb);
213220

214221
if (count) {
215-
/* emit single LRI with all non RMW regs */
222+
/*
223+
* Emit single LRI with all non RMW regs: 1 leading dw + 2dw per
224+
* reg + 1
225+
*/
216226

217227
bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
218228

@@ -236,7 +246,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
236246
}
237247

238248
if (count_rmw) {
239-
/* emit MI_MATH for each RMW reg */
249+
/* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */
240250

241251
xa_for_each(&sr->xa, idx, entry) {
242252
if (entry->reg.masked || entry->clr_bits == ~0)

0 commit comments

Comments
 (0)