Skip to content

Commit 84c0b4a

Browse files
vsbelgaumlucasdemarchi
authored andcommitted
drm/xe/bmg: Update Wa_22019338487
Limit GT max frequency to 2600MHz and wait for frequency to reduce before proceeding with a transient flush. This is really only needed for the transient flush: if L2 flush is needed due to 16023588340 then there's no need to do this additional wait since we are already using the bigger hammer. v2: Use generic names, ensure user set max frequency requests wait for flush to complete (Rodrigo) v3: - User requests wait via wait_var_event_timeout (Lucas) - Close races on flush + user requests (Lucas) - Fix xe_guc_pc_remove_flush_freq_limit() being called on last gt rather than root gt (Lucas) v4: - Only apply the freq reducing part if a TDF is needed: L2 flush trumps the need for waiting a lower frequency Fixes: aaa0807 ("drm/xe/bmg: Apply Wa_22019338487") Reviewed-by: Rodrigo Vivi <[email protected]> Signed-off-by: Vinay Belgaumkar <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Lucas De Marchi <[email protected]> (cherry picked from commit deea6a7) Signed-off-by: Lucas De Marchi <[email protected]>
1 parent a5c7dcd commit 84c0b4a

File tree

4 files changed

+135
-2
lines changed

4 files changed

+135
-2
lines changed

drivers/gpu/drm/xe/xe_device.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "xe_gt_printk.h"
4141
#include "xe_gt_sriov_vf.h"
4242
#include "xe_guc.h"
43+
#include "xe_guc_pc.h"
4344
#include "xe_hw_engine_group.h"
4445
#include "xe_hwmon.h"
4546
#include "xe_irq.h"
@@ -1071,11 +1072,14 @@ void xe_device_td_flush(struct xe_device *xe)
10711072
return;
10721073

10731074
root_gt = xe_root_mmio_gt(xe);
1074-
if (XE_WA(root_gt, 16023588340))
1075+
if (XE_WA(root_gt, 16023588340)) {
10751076
/* A transient flush is not sufficient: flush the L2 */
10761077
xe_device_l2_flush(xe);
1077-
else
1078+
} else {
1079+
xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc);
10781080
tdf_request_sync(xe);
1081+
xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc);
1082+
}
10791083
}
10801084

10811085
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)

drivers/gpu/drm/xe/xe_guc_pc.c

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77

88
#include <linux/cleanup.h>
99
#include <linux/delay.h>
10+
#include <linux/jiffies.h>
1011
#include <linux/ktime.h>
12+
#include <linux/wait_bit.h>
1113

1214
#include <drm/drm_managed.h>
1315
#include <drm/drm_print.h>
@@ -53,9 +55,11 @@
5355
#define LNL_MERT_FREQ_CAP 800
5456
#define BMG_MERT_FREQ_CAP 2133
5557
#define BMG_MIN_FREQ 1200
58+
#define BMG_MERT_FLUSH_FREQ_CAP 2600
5659

5760
#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
5861
#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
62+
#define SLPC_ACT_FREQ_TIMEOUT_MS 100
5963

6064
/**
6165
* DOC: GuC Power Conservation (PC)
@@ -143,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc,
143147
return -ETIMEDOUT;
144148
}
145149

150+
static int wait_for_flush_complete(struct xe_guc_pc *pc)
151+
{
152+
const unsigned long timeout = msecs_to_jiffies(30);
153+
154+
if (!wait_var_event_timeout(&pc->flush_freq_limit,
155+
!atomic_read(&pc->flush_freq_limit),
156+
timeout))
157+
return -ETIMEDOUT;
158+
159+
return 0;
160+
}
161+
162+
static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq)
163+
{
164+
int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC;
165+
int slept, wait = 10;
166+
167+
for (slept = 0; slept < timeout_us;) {
168+
if (xe_guc_pc_get_act_freq(pc) <= freq)
169+
return 0;
170+
171+
usleep_range(wait, wait << 1);
172+
slept += wait;
173+
wait <<= 1;
174+
if (slept + wait > timeout_us)
175+
wait = timeout_us - slept;
176+
}
177+
178+
return -ETIMEDOUT;
179+
}
146180
static int pc_action_reset(struct xe_guc_pc *pc)
147181
{
148182
struct xe_guc_ct *ct = pc_to_ct(pc);
@@ -688,6 +722,11 @@ static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq)
688722
*/
689723
int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
690724
{
725+
if (XE_WA(pc_to_gt(pc), 22019338487)) {
726+
if (wait_for_flush_complete(pc) != 0)
727+
return -EAGAIN;
728+
}
729+
691730
guard(mutex)(&pc->freq_lock);
692731

693732
return xe_guc_pc_set_max_freq_locked(pc, freq);
@@ -888,6 +927,92 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
888927
return ret;
889928
}
890929

930+
static bool needs_flush_freq_limit(struct xe_guc_pc *pc)
931+
{
932+
struct xe_gt *gt = pc_to_gt(pc);
933+
934+
return XE_WA(gt, 22019338487) &&
935+
pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP;
936+
}
937+
938+
/**
939+
* xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush
940+
* @pc: the xe_guc_pc object
941+
*
942+
* As per the WA, reduce max GT frequency during L2 cache flush
943+
*/
944+
void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
945+
{
946+
struct xe_gt *gt = pc_to_gt(pc);
947+
u32 max_freq;
948+
int ret;
949+
950+
if (!needs_flush_freq_limit(pc))
951+
return;
952+
953+
guard(mutex)(&pc->freq_lock);
954+
955+
ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq);
956+
if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) {
957+
ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP);
958+
if (ret) {
959+
xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n",
960+
BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
961+
return;
962+
}
963+
964+
atomic_set(&pc->flush_freq_limit, 1);
965+
966+
/*
967+
* If user has previously changed max freq, stash that value to
968+
* restore later, otherwise use the current max. New user
969+
* requests wait on flush.
970+
*/
971+
if (pc->user_requested_max != 0)
972+
pc->stashed_max_freq = pc->user_requested_max;
973+
else
974+
pc->stashed_max_freq = max_freq;
975+
}
976+
977+
/*
978+
* Wait for actual freq to go below the flush cap: even if the previous
979+
* max was below cap, the current one might still be above it
980+
*/
981+
ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
982+
if (ret)
983+
xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
984+
BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
985+
}
986+
987+
/**
988+
* xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes.
989+
* @pc: the xe_guc_pc object
990+
*
991+
* Retrieve the previous GT max frequency value.
992+
*/
993+
void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc)
994+
{
995+
struct xe_gt *gt = pc_to_gt(pc);
996+
int ret = 0;
997+
998+
if (!needs_flush_freq_limit(pc))
999+
return;
1000+
1001+
if (!atomic_read(&pc->flush_freq_limit))
1002+
return;
1003+
1004+
mutex_lock(&pc->freq_lock);
1005+
1006+
ret = pc_set_max_freq(&gt->uc.guc.pc, pc->stashed_max_freq);
1007+
if (ret)
1008+
xe_gt_err_once(gt, "Failed to restore max freq %u:%d",
1009+
pc->stashed_max_freq, ret);
1010+
1011+
atomic_set(&pc->flush_freq_limit, 0);
1012+
mutex_unlock(&pc->freq_lock);
1013+
wake_up_var(&pc->flush_freq_limit);
1014+
}
1015+
8911016
static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
8921017
{
8931018
int ret;

drivers/gpu/drm/xe/xe_guc_pc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
3838
void xe_guc_pc_init_early(struct xe_guc_pc *pc);
3939
int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
4040
void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
41+
void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc);
42+
void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc);
4143

4244
#endif /* _XE_GUC_PC_H_ */

drivers/gpu/drm/xe/xe_guc_pc_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
struct xe_guc_pc {
1616
/** @bo: GGTT buffer object that is shared with GuC PC */
1717
struct xe_bo *bo;
18+
/** @flush_freq_limit: 1 when max freq changes are limited by driver */
19+
atomic_t flush_freq_limit;
1820
/** @rp0_freq: HW RP0 frequency - The Maximum one */
1921
u32 rp0_freq;
2022
/** @rpa_freq: HW RPa frequency - The Achievable one */

0 commit comments

Comments
 (0)