Skip to content

Commit 9cc3193

Browse files
committed
i915/perf: Drop the aging_tail logic in perf OA
On DG2, capturing OA reports while running heavy render workloads sometimes results in invalid OA reports where 64-byte chunks inside reports have stale values. Under memory pressure, high OA sampling rates (13.3 us) and heavy render workload, occasionally, the OA HW TAIL pointer does not progress as fast as the sampling rate. When these glitches occur, the TAIL pointer takes approx. 200us to progress. While this is expected behavior from the HW perspective, invalid reports are not expected. In oa_buffer_check_unlocked(), when we execute the if condition, we are updating the oa_buffer.tail to the aging tail and then setting pollin based on this tail value, however, we do not have a chance to rewind and validate the reports prior to setting pollin. The validation happens in a subsequent call to oa_buffer_check_unlocked(). If a read occurs before this validation, then we end up reading reports up until this oa_buffer.tail value which includes invalid reports. Though found on DG2, this affects all platforms. The aging tail logic is no longer necessary since we are explicitly checking for landed reports. Start by dropping the aging tail logic. v2: - Drop extra blank line - Add reason to drop aging logic (Ashutosh) - Add bug links (Ashutosh) - rename aged_tail to read_tail - Squash patches 3 and 1 v3: (Ashutosh) - Remove extra spaces - Remove gtt_offset from the pollin calculation - s/Bug:/Link/ in commit message (checkpatch) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7484 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7757 Signed-off-by: Umesh Nerlige Ramappa <[email protected]> Reviewed-by: Ashutosh Dixit <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 81b1b59 commit 9cc3193

File tree

2 files changed

+38
-69
lines changed

2 files changed

+38
-69
lines changed

drivers/gpu/drm/i915/i915_perf.c

Lines changed: 38 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -531,8 +531,7 @@ static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report)
531531
* (See description of OA_TAIL_MARGIN_NSEC above for further details.)
532532
*
533533
* Besides returning true when there is data available to read() this function
534-
* also updates the tail, aging_tail and aging_timestamp in the oa_buffer
535-
* object.
534+
* also updates the tail in the oa_buffer object.
536535
*
537536
* Note: It's safe to read OA config state here unlocked, assuming that this is
538537
* only called while the stream is enabled, while the global OA configuration
@@ -544,10 +543,10 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
544543
{
545544
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
546545
int report_size = stream->oa_buffer.format->size;
546+
u32 head, tail, read_tail;
547547
unsigned long flags;
548548
bool pollin;
549549
u32 hw_tail;
550-
u64 now;
551550
u32 partial_report_size;
552551

553552
/* We have to consider the (unlikely) possibility that read() errors
@@ -568,62 +567,47 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
568567
/* Subtract partial amount off the tail */
569568
hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size);
570569

571-
now = ktime_get_mono_fast_ns();
572-
573-
if (hw_tail == stream->oa_buffer.aging_tail &&
574-
(now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) {
575-
/* If the HW tail hasn't move since the last check and the HW
576-
* tail has been aging for long enough, declare it the new
577-
* tail.
578-
*/
579-
stream->oa_buffer.tail = stream->oa_buffer.aging_tail;
580-
} else {
581-
u32 head, tail, aged_tail;
582-
583-
/* NB: The head we observe here might effectively be a little
584-
* out of date. If a read() is in progress, the head could be
585-
* anywhere between this head and stream->oa_buffer.tail.
586-
*/
587-
head = stream->oa_buffer.head - gtt_offset;
588-
aged_tail = stream->oa_buffer.tail - gtt_offset;
589-
590-
hw_tail -= gtt_offset;
591-
tail = hw_tail;
592-
593-
/* Walk the stream backward until we find a report with report
594-
* id and timestmap not at 0. Since the circular buffer pointers
595-
* progress by increments of 64 bytes and that reports can be up
596-
* to 256 bytes long, we can't tell whether a report has fully
597-
* landed in memory before the report id and timestamp of the
598-
* following report have effectively landed.
599-
*
600-
* This is assuming that the writes of the OA unit land in
601-
* memory in the order they were written to.
602-
* If not : (╯°□°)╯︵ ┻━┻
603-
*/
604-
while (OA_TAKEN(tail, aged_tail) >= report_size) {
605-
void *report = stream->oa_buffer.vaddr + tail;
570+
/* NB: The head we observe here might effectively be a little
571+
* out of date. If a read() is in progress, the head could be
572+
* anywhere between this head and stream->oa_buffer.tail.
573+
*/
574+
head = stream->oa_buffer.head - gtt_offset;
575+
read_tail = stream->oa_buffer.tail - gtt_offset;
576+
577+
hw_tail -= gtt_offset;
578+
tail = hw_tail;
579+
580+
/* Walk the stream backward until we find a report with report
581+
* id and timestmap not at 0. Since the circular buffer pointers
582+
* progress by increments of 64 bytes and that reports can be up
583+
* to 256 bytes long, we can't tell whether a report has fully
584+
* landed in memory before the report id and timestamp of the
585+
* following report have effectively landed.
586+
*
587+
* This is assuming that the writes of the OA unit land in
588+
* memory in the order they were written to.
589+
* If not : (╯°□°)╯︵ ┻━┻
590+
*/
591+
while (OA_TAKEN(tail, read_tail) >= report_size) {
592+
void *report = stream->oa_buffer.vaddr + tail;
606593

607-
if (oa_report_id(stream, report) ||
608-
oa_timestamp(stream, report))
609-
break;
594+
if (oa_report_id(stream, report) ||
595+
oa_timestamp(stream, report))
596+
break;
610597

611-
tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
612-
}
598+
tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
599+
}
613600

614-
if (OA_TAKEN(hw_tail, tail) > report_size &&
615-
__ratelimit(&stream->perf->tail_pointer_race))
616-
drm_notice(&stream->uncore->i915->drm,
617-
"unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n",
618-
head, tail, hw_tail);
601+
if (OA_TAKEN(hw_tail, tail) > report_size &&
602+
__ratelimit(&stream->perf->tail_pointer_race))
603+
drm_notice(&stream->uncore->i915->drm,
604+
"unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n",
605+
head, tail, hw_tail);
619606

620-
stream->oa_buffer.tail = gtt_offset + tail;
621-
stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
622-
stream->oa_buffer.aging_timestamp = now;
623-
}
607+
stream->oa_buffer.tail = gtt_offset + tail;
624608

625-
pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
626-
stream->oa_buffer.head - gtt_offset) >= report_size;
609+
pollin = OA_TAKEN(stream->oa_buffer.tail,
610+
stream->oa_buffer.head) >= report_size;
627611

628612
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
629613

@@ -1727,7 +1711,6 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
17271711
gtt_offset | OABUFFER_SIZE_16M);
17281712

17291713
/* Mark that we need updated tail pointers to read from... */
1730-
stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
17311714
stream->oa_buffer.tail = gtt_offset;
17321715

17331716
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -1779,7 +1762,6 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
17791762
intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
17801763

17811764
/* Mark that we need updated tail pointers to read from... */
1782-
stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
17831765
stream->oa_buffer.tail = gtt_offset;
17841766

17851767
/*
@@ -1833,7 +1815,6 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
18331815
gtt_offset & GEN12_OAG_OATAILPTR_MASK);
18341816

18351817
/* Mark that we need updated tail pointers to read from... */
1836-
stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
18371818
stream->oa_buffer.tail = gtt_offset;
18381819

18391820
/*

drivers/gpu/drm/i915/i915_perf_types.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -312,18 +312,6 @@ struct i915_perf_stream {
312312
*/
313313
spinlock_t ptr_lock;
314314

315-
/**
316-
* @aging_tail: The last HW tail reported by HW. The data
317-
* might not have made it to memory yet though.
318-
*/
319-
u32 aging_tail;
320-
321-
/**
322-
* @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
323-
* was read; used to determine when it is old enough to trust.
324-
*/
325-
u64 aging_timestamp;
326-
327315
/**
328316
* @head: Although we can always read back the head pointer register,
329317
* we prefer to avoid trusting the HW state, just to avoid any

0 commit comments

Comments
 (0)