Skip to content

Commit 9630b58

Browse files
committed
drm/msm/gem: Prevent blocking within shrinker loop
Consider this scenario: 1. APP1 continuously creates lots of small GEMs 2. APP2 triggers `drop_caches` 3. Shrinker starts to evict APP1 GEMs, while APP1 produces new purgeable GEMs 4. msm_gem_shrinker_scan() returns non-zero number of freed pages and causes shrinker to try shrink more 5. msm_gem_shrinker_scan() returns non-zero number of freed pages again, goto 4 6. The APP2 is blocked in `drop_caches` until APP1 stops producing purgeable GEMs To prevent this blocking scenario, check number of remaining pages that GPU shrinker couldn't release due to a GEM locking contention or shrinking rejection. If there are no remaining pages left to shrink, then there is no need to free up more pages and shrinker may break out from the loop. This problem was found during shrinker/madvise IOCTL testing of virtio-gpu driver. The MSM driver is affected in the same way. Reviewed-by: Rob Clark <[email protected]> Reviewed-by: Thomas Zimmermann <[email protected]> Fixes: b352ba5 ("drm/msm/gem: Convert to using drm_gem_lru") Signed-off-by: Dmitry Osipenko <[email protected]> Link: https://lore.kernel.org/all/[email protected]/
1 parent ee9adb7 commit 9630b58

File tree

3 files changed

+19
-5
lines changed

3 files changed

+19
-5
lines changed

drivers/gpu/drm/drm_gem.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,10 +1375,13 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail);
13751375
*
13761376
* @lru: The LRU to scan
13771377
* @nr_to_scan: The number of pages to try to reclaim
1378+
* @remaining: The number of pages left to reclaim, should be initialized by caller
13781379
* @shrink: Callback to try to shrink/reclaim the object.
13791380
*/
13801381
unsigned long
1381-
drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan,
1382+
drm_gem_lru_scan(struct drm_gem_lru *lru,
1383+
unsigned int nr_to_scan,
1384+
unsigned long *remaining,
13821385
bool (*shrink)(struct drm_gem_object *obj))
13831386
{
13841387
struct drm_gem_lru still_in_lru;
@@ -1417,8 +1420,10 @@ drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan,
14171420
* hit shrinker in response to trying to get backing pages
14181421
* for this obj (ie. while it's lock is already held)
14191422
*/
1420-
if (!dma_resv_trylock(obj->resv))
1423+
if (!dma_resv_trylock(obj->resv)) {
1424+
*remaining += obj->size >> PAGE_SHIFT;
14211425
goto tail;
1426+
}
14221427

14231428
if (shrink(obj)) {
14241429
freed += obj->size >> PAGE_SHIFT;

drivers/gpu/drm/msm/msm_gem_shrinker.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
107107
bool (*shrink)(struct drm_gem_object *obj);
108108
bool cond;
109109
unsigned long freed;
110+
unsigned long remaining;
110111
} stages[] = {
111112
/* Stages of progressively more aggressive/expensive reclaim: */
112113
{ &priv->lru.dontneed, purge, true },
@@ -116,14 +117,18 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
116117
};
117118
long nr = sc->nr_to_scan;
118119
unsigned long freed = 0;
120+
unsigned long remaining = 0;
119121

120122
for (unsigned i = 0; (nr > 0) && (i < ARRAY_SIZE(stages)); i++) {
121123
if (!stages[i].cond)
122124
continue;
123125
stages[i].freed =
124-
drm_gem_lru_scan(stages[i].lru, nr, stages[i].shrink);
126+
drm_gem_lru_scan(stages[i].lru, nr,
127+
&stages[i].remaining,
128+
stages[i].shrink);
125129
nr -= stages[i].freed;
126130
freed += stages[i].freed;
131+
remaining += stages[i].remaining;
127132
}
128133

129134
if (freed) {
@@ -132,7 +137,7 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
132137
stages[3].freed);
133138
}
134139

135-
return (freed > 0) ? freed : SHRINK_STOP;
140+
return (freed > 0 && remaining > 0) ? freed : SHRINK_STOP;
136141
}
137142

138143
#ifdef CONFIG_DEBUG_FS
@@ -182,10 +187,12 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
182187
NULL,
183188
};
184189
unsigned idx, unmapped = 0;
190+
unsigned long remaining = 0;
185191

186192
for (idx = 0; lrus[idx] && unmapped < vmap_shrink_limit; idx++) {
187193
unmapped += drm_gem_lru_scan(lrus[idx],
188194
vmap_shrink_limit - unmapped,
195+
&remaining,
189196
vmap_shrink);
190197
}
191198

include/drm/drm_gem.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,9 @@ int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
475475
void drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock);
476476
void drm_gem_lru_remove(struct drm_gem_object *obj);
477477
void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj);
478-
unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan,
478+
unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru,
479+
unsigned int nr_to_scan,
480+
unsigned long *remaining,
479481
bool (*shrink)(struct drm_gem_object *obj));
480482

481483
#endif /* __DRM_GEM_H__ */

0 commit comments

Comments
 (0)