Skip to content

Commit 3f4c175

Browse files
jiadozhualexdeucher
authored andcommitted
drm/amdgpu: MCBP based on DRM scheduler (v9)
Trigger Mid-Command Buffer Preemption according to the priority of the software rings and the hw fence signalling condition. The muxer saves the locations of the indirect buffer frames from the software ring together with the fence sequence number in its fifo queue, and pops out those records when the fences are signalled. The locations are used to resubmit packages in preemption scenarios by coping the chunks from the software ring. v2: Update comment style. v3: Fix conflict caused by previous modifications. v4: Remove unnecessary prints. v5: Fix corner cases for resubmission cases. v6: Refactor functions for resubmission, calling fence_process in irq handler. v7: Solve conflict for removing amdgpu_sw_ring.c. v8: Add time threshold to judge if preemption request is needed. v9: Correct comment spelling. Set fence emit timestamp before rsu assignment. Cc: Christian Koenig <[email protected]> Cc: Luben Tuikov <[email protected]> Cc: Andrey Grodzovsky <[email protected]> Cc: Michel Dänzer <[email protected]> Signed-off-by: Jiadong.Zhu <[email protected]> Acked-by: Luben Tuikov <[email protected]> Acked-by: Huang Rui <[email protected]> Acked-by: Christian König <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent be25455 commit 3f4c175

File tree

8 files changed

+422
-43
lines changed

8 files changed

+422
-43
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ struct amdgpu_fence {
5555

5656
/* RB, DMA, etc. */
5757
struct amdgpu_ring *ring;
58+
ktime_t start_timestamp;
5859
};
5960

6061
static struct kmem_cache *amdgpu_fence_slab;
@@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
199200
}
200201
}
201202

203+
to_amdgpu_fence(fence)->start_timestamp = ktime_get();
204+
202205
/* This function can't be called concurrently anyway, otherwise
203206
* emitting the fence would mess up the hardware ring buffer.
204207
*/
@@ -406,6 +409,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
406409
return lower_32_bits(emitted);
407410
}
408411

412+
/**
413+
* amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
414+
* @ring: ring the fence is associated with
415+
*
416+
* Find the earliest fence unsignaled until now, calculate the time delta
417+
* between the time fence emitted and now.
418+
*/
419+
u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
420+
{
421+
struct amdgpu_fence_driver *drv = &ring->fence_drv;
422+
struct dma_fence *fence;
423+
uint32_t last_seq, sync_seq;
424+
425+
last_seq = atomic_read(&ring->fence_drv.last_seq);
426+
sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
427+
if (last_seq == sync_seq)
428+
return 0;
429+
430+
++last_seq;
431+
last_seq &= drv->num_fences_mask;
432+
fence = drv->fences[last_seq];
433+
if (!fence)
434+
return 0;
435+
436+
return ktime_us_delta(ktime_get(),
437+
to_amdgpu_fence(fence)->start_timestamp);
438+
}
439+
440+
/**
441+
* amdgpu_fence_update_start_timestamp - update the timestamp of the fence
442+
* @ring: ring the fence is associated with
443+
* @seq: the fence seq number to update.
444+
* @timestamp: the start timestamp to update.
445+
*
446+
* The function called at the time the fence and related ib is about to
447+
* resubmit to gpu in MCBP scenario. Thus we do not consider race condition
448+
* with amdgpu_fence_process to modify the same fence.
449+
*/
450+
void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
451+
{
452+
struct amdgpu_fence_driver *drv = &ring->fence_drv;
453+
struct dma_fence *fence;
454+
455+
seq &= drv->num_fences_mask;
456+
fence = drv->fences[seq];
457+
if (!fence)
458+
return;
459+
460+
to_amdgpu_fence(fence)->start_timestamp = timestamp;
461+
}
462+
409463
/**
410464
* amdgpu_fence_driver_start_ring - make the fence driver
411465
* ready for use on the requested ring.

drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
211211
}
212212
}
213213

214+
amdgpu_ring_ib_begin(ring);
214215
if (job && ring->funcs->init_cond_exec)
215216
patch_offset = amdgpu_ring_init_cond_exec(ring);
216217

@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
285286
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
286287
ring->funcs->emit_wave_limit(ring, false);
287288

289+
amdgpu_ring_ib_end(ring);
288290
amdgpu_ring_commit(ring);
289291
return 0;
290292
}

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,3 +569,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
569569

570570
return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
571571
}
572+
573+
void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
574+
{
575+
if (ring->is_sw_ring)
576+
amdgpu_sw_ring_ib_begin(ring);
577+
}
578+
579+
void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
580+
{
581+
if (ring->is_sw_ring)
582+
amdgpu_sw_ring_ib_end(ring);
583+
}

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
145145
uint32_t wait_seq,
146146
signed long timeout);
147147
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
148+
148149
void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
149150

151+
u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring);
152+
void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
153+
ktime_t timestamp);
154+
150155
/*
151156
* Rings.
152157
*/
@@ -313,6 +318,9 @@ struct amdgpu_ring {
313318
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
314319

315320
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
321+
void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
322+
void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
323+
316324
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
317325
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
318326
void amdgpu_ring_commit(struct amdgpu_ring *ring);

0 commit comments

Comments
 (0)