Skip to content

Commit eca13f3

Browse files
drm/amdgpu: use the last IB as gang leader v2
It turned out that not the last IB specified is the gang leader, but instead the last job allocated. This is a bit unfortunate and not very intuitive for the CS interface, so try to fix this. Signed-off-by: Christian König <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] Tested-by: Timur Kristóf <[email protected]> Acked-by: Timur Kristóf <[email protected]> Reviewed-by: Alex Deucher <[email protected]> Fixes: 4624459 ("drm/amdgpu: add gang submit frontend v6")
1 parent e17a025 commit eca13f3

File tree

2 files changed

+17
-7
lines changed

2 files changed

+17
-7
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
109109
return r;
110110

111111
++(num_ibs[r]);
112+
p->gang_leader_idx = r;
112113
return 0;
113114
}
114115

@@ -300,7 +301,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
300301
if (ret)
301302
goto free_all_kdata;
302303
}
303-
p->gang_leader = p->jobs[p->gang_size - 1];
304+
p->gang_leader = p->jobs[p->gang_leader_idx];
304305

305306
if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
306307
ret = -ECANCELED;
@@ -1194,16 +1195,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
11941195
return r;
11951196
}
11961197

1197-
for (i = 0; i < p->gang_size - 1; ++i) {
1198+
for (i = 0; i < p->gang_size; ++i) {
1199+
if (p->jobs[i] == leader)
1200+
continue;
1201+
11981202
r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync);
11991203
if (r)
12001204
return r;
12011205
}
12021206

1203-
r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]);
1207+
r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
12041208
if (r && r != -ERESTARTSYS)
12051209
DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
1206-
12071210
return r;
12081211
}
12091212

@@ -1237,9 +1240,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
12371240
for (i = 0; i < p->gang_size; ++i)
12381241
drm_sched_job_arm(&p->jobs[i]->base);
12391242

1240-
for (i = 0; i < (p->gang_size - 1); ++i) {
1243+
for (i = 0; i < p->gang_size; ++i) {
12411244
struct dma_fence *fence;
12421245

1246+
if (p->jobs[i] == leader)
1247+
continue;
1248+
12431249
fence = &p->jobs[i]->base.s_fence->scheduled;
12441250
r = amdgpu_sync_fence(&leader->sync, fence);
12451251
if (r)
@@ -1275,7 +1281,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
12751281
list_for_each_entry(e, &p->validated, tv.head) {
12761282

12771283
/* Everybody except for the gang leader uses READ */
1278-
for (i = 0; i < (p->gang_size - 1); ++i) {
1284+
for (i = 0; i < p->gang_size; ++i) {
1285+
if (p->jobs[i] == leader)
1286+
continue;
1287+
12791288
dma_resv_add_fence(e->tv.bo->base.resv,
12801289
&p->jobs[i]->base.s_fence->finished,
12811290
DMA_RESV_USAGE_READ);
@@ -1285,7 +1294,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
12851294
e->tv.num_shared = 0;
12861295
}
12871296

1288-
seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1],
1297+
seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
12891298
p->fence);
12901299
amdgpu_cs_post_dependencies(p);
12911300

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ struct amdgpu_cs_parser {
5454

5555
/* scheduler job objects */
5656
unsigned int gang_size;
57+
unsigned int gang_leader_idx;
5758
struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE];
5859
struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE];
5960
struct amdgpu_job *gang_leader;

0 commit comments

Comments
 (0)