Skip to content

Commit 00062ea

Browse files
committed
Merge tag 'drm-xe-fixes-2025-08-14' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
- Some more xe_migrate_access_memory fixes (Auld) - Defer buffer object shrinker write-backs and GPU waits (Thomas) - HWMON fix for clamping limits (Karthik) - SRIOV-PF: Set VF LMEM BAR size (Michal) Signed-off-by: Dave Airlie <[email protected]> From: Rodrigo Vivi <[email protected]> Link: https://lore.kernel.org/r/[email protected]
2 parents 4699c04 + 94eae6e commit 00062ea

File tree

5 files changed

+126
-19
lines changed

5 files changed

+126
-19
lines changed

drivers/gpu/drm/xe/regs/xe_bars.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@
77

88
#define GTTMMADR_BAR 0 /* MMIO + GTT */
99
#define LMEM_BAR 2 /* VRAM */
10+
#define VF_LMEM_BAR 9 /* VF VRAM */
1011

1112
#endif

drivers/gpu/drm/xe/xe_hwmon.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe
332332
int ret = 0;
333333
u32 reg_val, max;
334334
struct xe_reg rapl_limit;
335+
u64 max_supp_power_limit = 0;
335336

336337
mutex_lock(&hwmon->hwmon_lock);
337338

@@ -356,6 +357,20 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe
356357
goto unlock;
357358
}
358359

360+
/*
361+
* If the sysfs value exceeds the maximum pcode supported power limit value, clamp it to
362+
* the supported maximum (U12.3 format).
363+
* This is to avoid truncation during reg_val calculation below and ensure the valid
364+
* power limit is sent for pcode which would clamp it to card-supported value.
365+
*/
366+
max_supp_power_limit = ((PWR_LIM_VAL) >> hwmon->scl_shift_power) * SF_POWER;
367+
if (value > max_supp_power_limit) {
368+
value = max_supp_power_limit;
369+
drm_info(&hwmon->xe->drm,
370+
"Power limit clamped as selected %s exceeds channel %d limit\n",
371+
PWR_ATTR_TO_STR(attr), channel);
372+
}
373+
359374
/* Computation in 64-bits to avoid overflow. Round to nearest. */
360375
reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
361376

@@ -739,9 +754,23 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel,
739754
{
740755
int ret;
741756
u32 uval;
757+
u64 max_crit_power_curr = 0;
742758

743759
mutex_lock(&hwmon->hwmon_lock);
744760

761+
/*
762+
* If the sysfs value exceeds the pcode mailbox cmd POWER_SETUP_SUBCOMMAND_WRITE_I1
763+
* max supported value, clamp it to the command's max (U10.6 format).
764+
* This is to avoid truncation during uval calculation below and ensure the valid power
765+
* limit is sent for pcode which would clamp it to card-supported value.
766+
*/
767+
max_crit_power_curr = (POWER_SETUP_I1_DATA_MASK >> POWER_SETUP_I1_SHIFT) * scale_factor;
768+
if (value > max_crit_power_curr) {
769+
value = max_crit_power_curr;
770+
drm_info(&hwmon->xe->drm,
771+
"Power limit clamped as selected exceeds channel %d limit\n",
772+
channel);
773+
}
745774
uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor);
746775
ret = xe_hwmon_pcode_write_i1(hwmon, uval);
747776

drivers/gpu/drm/xe/xe_migrate.c

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1820,15 +1820,19 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
18201820
if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) ||
18211821
!IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) {
18221822
int buf_offset = 0;
1823+
void *bounce;
1824+
int err;
1825+
1826+
BUILD_BUG_ON(!is_power_of_2(XE_CACHELINE_BYTES));
1827+
bounce = kmalloc(XE_CACHELINE_BYTES, GFP_KERNEL);
1828+
if (!bounce)
1829+
return -ENOMEM;
18231830

18241831
/*
18251832
* Less than ideal for large unaligned access but this should be
18261833
* fairly rare, can fixup if this becomes common.
18271834
*/
18281835
do {
1829-
u8 bounce[XE_CACHELINE_BYTES];
1830-
void *ptr = (void *)bounce;
1831-
int err;
18321836
int copy_bytes = min_t(int, bytes_left,
18331837
XE_CACHELINE_BYTES -
18341838
(offset & XE_CACHELINE_MASK));
@@ -1837,22 +1841,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
18371841
err = xe_migrate_access_memory(m, bo,
18381842
offset &
18391843
~XE_CACHELINE_MASK,
1840-
(void *)ptr,
1841-
sizeof(bounce), 0);
1844+
bounce,
1845+
XE_CACHELINE_BYTES, 0);
18421846
if (err)
1843-
return err;
1847+
break;
18441848

18451849
if (write) {
1846-
memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes);
1850+
memcpy(bounce + ptr_offset, buf + buf_offset, copy_bytes);
18471851

18481852
err = xe_migrate_access_memory(m, bo,
18491853
offset & ~XE_CACHELINE_MASK,
1850-
(void *)ptr,
1851-
sizeof(bounce), write);
1854+
bounce,
1855+
XE_CACHELINE_BYTES, write);
18521856
if (err)
1853-
return err;
1857+
break;
18541858
} else {
1855-
memcpy(buf + buf_offset, ptr + ptr_offset,
1859+
memcpy(buf + buf_offset, bounce + ptr_offset,
18561860
copy_bytes);
18571861
}
18581862

@@ -1861,7 +1865,8 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
18611865
offset += copy_bytes;
18621866
} while (bytes_left);
18631867

1864-
return 0;
1868+
kfree(bounce);
1869+
return err;
18651870
}
18661871

18671872
dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write);
@@ -1882,8 +1887,11 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
18821887
else
18831888
current_bytes = min_t(int, bytes_left, cursor.size);
18841889

1885-
if (fence)
1886-
dma_fence_put(fence);
1890+
if (current_bytes & ~PAGE_MASK) {
1891+
int pitch = 4;
1892+
1893+
current_bytes = min_t(int, current_bytes, S16_MAX * pitch);
1894+
}
18871895

18881896
__fence = xe_migrate_vram(m, current_bytes,
18891897
(unsigned long)buf & ~PAGE_MASK,
@@ -1892,11 +1900,15 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
18921900
XE_MIGRATE_COPY_TO_VRAM :
18931901
XE_MIGRATE_COPY_TO_SRAM);
18941902
if (IS_ERR(__fence)) {
1895-
if (fence)
1903+
if (fence) {
18961904
dma_fence_wait(fence, false);
1905+
dma_fence_put(fence);
1906+
}
18971907
fence = __fence;
18981908
goto out_err;
18991909
}
1910+
1911+
dma_fence_put(fence);
19001912
fence = __fence;
19011913

19021914
buf += current_bytes;

drivers/gpu/drm/xe/xe_pci_sriov.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
* Copyright © 2023-2024 Intel Corporation
44
*/
55

6+
#include <linux/bitops.h>
7+
#include <linux/pci.h>
8+
9+
#include "regs/xe_bars.h"
610
#include "xe_assert.h"
711
#include "xe_device.h"
812
#include "xe_gt_sriov_pf_config.h"
@@ -128,6 +132,18 @@ static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs,
128132
}
129133
}
130134

135+
static int resize_vf_vram_bar(struct xe_device *xe, int num_vfs)
136+
{
137+
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
138+
u32 sizes;
139+
140+
sizes = pci_iov_vf_bar_get_sizes(pdev, VF_LMEM_BAR, num_vfs);
141+
if (!sizes)
142+
return 0;
143+
144+
return pci_iov_vf_bar_set_size(pdev, VF_LMEM_BAR, __fls(sizes));
145+
}
146+
131147
static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
132148
{
133149
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -158,6 +174,12 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
158174
if (err < 0)
159175
goto failed;
160176

177+
if (IS_DGFX(xe)) {
178+
err = resize_vf_vram_bar(xe, num_vfs);
179+
if (err)
180+
xe_sriov_info(xe, "Failed to set VF LMEM BAR size: %d\n", err);
181+
}
182+
161183
err = pci_enable_sriov(pdev, num_vfs);
162184
if (err < 0)
163185
goto failed;

drivers/gpu/drm/xe/xe_shrinker.c

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea
5454
write_unlock(&shrinker->lock);
5555
}
5656

57-
static s64 xe_shrinker_walk(struct xe_device *xe,
58-
struct ttm_operation_ctx *ctx,
59-
const struct xe_bo_shrink_flags flags,
60-
unsigned long to_scan, unsigned long *scanned)
57+
static s64 __xe_shrinker_walk(struct xe_device *xe,
58+
struct ttm_operation_ctx *ctx,
59+
const struct xe_bo_shrink_flags flags,
60+
unsigned long to_scan, unsigned long *scanned)
6161
{
6262
unsigned int mem_type;
6363
s64 freed = 0, lret;
@@ -93,6 +93,48 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
9393
return freed;
9494
}
9595

96+
/*
97+
* Try shrinking idle objects without writeback first, then if not sufficient,
98+
* try also non-idle objects and finally if that's not sufficient either,
99+
* add writeback. This avoids stalls and explicit writebacks with light or
100+
* moderate memory pressure.
101+
*/
102+
static s64 xe_shrinker_walk(struct xe_device *xe,
103+
struct ttm_operation_ctx *ctx,
104+
const struct xe_bo_shrink_flags flags,
105+
unsigned long to_scan, unsigned long *scanned)
106+
{
107+
bool no_wait_gpu = true;
108+
struct xe_bo_shrink_flags save_flags = flags;
109+
s64 lret, freed;
110+
111+
swap(no_wait_gpu, ctx->no_wait_gpu);
112+
save_flags.writeback = false;
113+
lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
114+
swap(no_wait_gpu, ctx->no_wait_gpu);
115+
if (lret < 0 || *scanned >= to_scan)
116+
return lret;
117+
118+
freed = lret;
119+
if (!ctx->no_wait_gpu) {
120+
lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
121+
if (lret < 0)
122+
return lret;
123+
freed += lret;
124+
if (*scanned >= to_scan)
125+
return freed;
126+
}
127+
128+
if (flags.writeback) {
129+
lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned);
130+
if (lret < 0)
131+
return lret;
132+
freed += lret;
133+
}
134+
135+
return freed;
136+
}
137+
96138
static unsigned long
97139
xe_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
98140
{
@@ -199,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con
199241
runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup);
200242

201243
shrink_flags.purge = false;
244+
202245
lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags,
203246
nr_to_scan, &nr_scanned);
204247
if (lret >= 0)

0 commit comments

Comments
 (0)