Skip to content

Commit dbe97cf

Browse files
DemiMariejgross1
authored andcommitted
xen/gntdev: Avoid blocking in unmap_grant_pages()
unmap_grant_pages() currently waits for the pages to no longer be used. In QubesOS/qubes-issues#7481, this lead to a deadlock against i915: i915 was waiting for gntdev's MMU notifier to finish, while gntdev was waiting for i915 to free its pages. I also believe this is responsible for various deadlocks I have experienced in the past. Avoid these problems by making unmap_grant_pages async. This requires making it return void, as any errors will not be available when the function returns. Fortunately, the only use of the return value is a WARN_ON(), which can be replaced by a WARN_ON when the error is detected. Additionally, a failed call will not prevent further calls from being made, but this is harmless. Because unmap_grant_pages is now async, the grant handle will be sent to INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same handle. Instead, a separate bool array is allocated for this purpose. This wastes memory, but stuffing this information in padding bytes is too fragile. Furthermore, it is necessary to grab a reference to the map before making the asynchronous call, and release the reference when the call returns. It is also necessary to guard against reentrancy in gntdev_map_put(), and to handle the case where userspace tries to map a mapping whose contents have not all been freed yet. Fixes: 7452822 ("xen/gntdev: safely unmap grants in case they are still in use") Cc: [email protected] Signed-off-by: Demi Marie Obenour <[email protected]> Reviewed-by: Juergen Gross <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Juergen Gross <[email protected]>
1 parent ca69690 commit dbe97cf

File tree

2 files changed

+113
-51
lines changed

2 files changed

+113
-51
lines changed

drivers/xen/gntdev-common.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/mmu_notifier.h>
1717
#include <linux/types.h>
1818
#include <xen/interface/event_channel.h>
19+
#include <xen/grant_table.h>
1920

2021
struct gntdev_dmabuf_priv;
2122

@@ -56,6 +57,7 @@ struct gntdev_grant_map {
5657
struct gnttab_unmap_grant_ref *unmap_ops;
5758
struct gnttab_map_grant_ref *kmap_ops;
5859
struct gnttab_unmap_grant_ref *kunmap_ops;
60+
bool *being_removed;
5961
struct page **pages;
6062
unsigned long pages_vm_start;
6163

@@ -73,6 +75,11 @@ struct gntdev_grant_map {
7375
/* Needed to avoid allocation in gnttab_dma_free_pages(). */
7476
xen_pfn_t *frames;
7577
#endif
78+
79+
/* Number of live grants */
80+
atomic_t live_grants;
81+
/* Needed to avoid allocation in __unmap_grant_pages */
82+
struct gntab_unmap_queue_data unmap_data;
7683
};
7784

7885
struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,

drivers/xen/gntdev.c

Lines changed: 106 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <linux/slab.h>
3636
#include <linux/highmem.h>
3737
#include <linux/refcount.h>
38+
#include <linux/workqueue.h>
3839

3940
#include <xen/xen.h>
4041
#include <xen/grant_table.h>
@@ -60,10 +61,11 @@ module_param(limit, uint, 0644);
6061
MODULE_PARM_DESC(limit,
6162
"Maximum number of grants that may be mapped by one mapping request");
6263

64+
/* True in PV mode, false otherwise */
6365
static int use_ptemod;
6466

65-
static int unmap_grant_pages(struct gntdev_grant_map *map,
66-
int offset, int pages);
67+
static void unmap_grant_pages(struct gntdev_grant_map *map,
68+
int offset, int pages);
6769

6870
static struct miscdevice gntdev_miscdev;
6971

@@ -120,6 +122,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map)
120122
kvfree(map->unmap_ops);
121123
kvfree(map->kmap_ops);
122124
kvfree(map->kunmap_ops);
125+
kvfree(map->being_removed);
123126
kfree(map);
124127
}
125128

@@ -140,10 +143,13 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
140143
add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]),
141144
GFP_KERNEL);
142145
add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
146+
add->being_removed =
147+
kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
143148
if (NULL == add->grants ||
144149
NULL == add->map_ops ||
145150
NULL == add->unmap_ops ||
146-
NULL == add->pages)
151+
NULL == add->pages ||
152+
NULL == add->being_removed)
147153
goto err;
148154
if (use_ptemod) {
149155
add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]),
@@ -250,9 +256,36 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
250256
if (!refcount_dec_and_test(&map->users))
251257
return;
252258

253-
if (map->pages && !use_ptemod)
259+
if (map->pages && !use_ptemod) {
260+
/*
261+
* Increment the reference count. This ensures that the
262+
* subsequent call to unmap_grant_pages() will not wind up
263+
* re-entering itself. It *can* wind up calling
264+
* gntdev_put_map() recursively, but such calls will be with a
265+
* reference count greater than 1, so they will return before
266+
* this code is reached. The recursion depth is thus limited to
267+
* 1. Do NOT use refcount_inc() here, as it will detect that
268+
* the reference count is zero and WARN().
269+
*/
270+
refcount_set(&map->users, 1);
271+
272+
/*
273+
* Unmap the grants. This may or may not be asynchronous, so it
274+
* is possible that the reference count is 1 on return, but it
275+
* could also be greater than 1.
276+
*/
254277
unmap_grant_pages(map, 0, map->count);
255278

279+
/* Check if the memory now needs to be freed */
280+
if (!refcount_dec_and_test(&map->users))
281+
return;
282+
283+
/*
284+
* All pages have been returned to the hypervisor, so free the
285+
* map.
286+
*/
287+
}
288+
256289
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
257290
notify_remote_via_evtchn(map->notify.event);
258291
evtchn_put(map->notify.event);
@@ -283,6 +316,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
283316

284317
int gntdev_map_grant_pages(struct gntdev_grant_map *map)
285318
{
319+
size_t alloced = 0;
286320
int i, err = 0;
287321

288322
if (!use_ptemod) {
@@ -331,97 +365,116 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
331365
map->count);
332366

333367
for (i = 0; i < map->count; i++) {
334-
if (map->map_ops[i].status == GNTST_okay)
368+
if (map->map_ops[i].status == GNTST_okay) {
335369
map->unmap_ops[i].handle = map->map_ops[i].handle;
336-
else if (!err)
370+
if (!use_ptemod)
371+
alloced++;
372+
} else if (!err)
337373
err = -EINVAL;
338374

339375
if (map->flags & GNTMAP_device_map)
340376
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
341377

342378
if (use_ptemod) {
343-
if (map->kmap_ops[i].status == GNTST_okay)
379+
if (map->kmap_ops[i].status == GNTST_okay) {
380+
if (map->map_ops[i].status == GNTST_okay)
381+
alloced++;
344382
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
345-
else if (!err)
383+
} else if (!err)
346384
err = -EINVAL;
347385
}
348386
}
387+
atomic_add(alloced, &map->live_grants);
349388
return err;
350389
}
351390

352-
static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
353-
int pages)
391+
static void __unmap_grant_pages_done(int result,
392+
struct gntab_unmap_queue_data *data)
354393
{
355-
int i, err = 0;
356-
struct gntab_unmap_queue_data unmap_data;
357-
358-
if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
359-
int pgno = (map->notify.addr >> PAGE_SHIFT);
360-
if (pgno >= offset && pgno < offset + pages) {
361-
/* No need for kmap, pages are in lowmem */
362-
uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
363-
tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
364-
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
365-
}
366-
}
367-
368-
unmap_data.unmap_ops = map->unmap_ops + offset;
369-
unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
370-
unmap_data.pages = map->pages + offset;
371-
unmap_data.count = pages;
372-
373-
err = gnttab_unmap_refs_sync(&unmap_data);
374-
if (err)
375-
return err;
394+
unsigned int i;
395+
struct gntdev_grant_map *map = data->data;
396+
unsigned int offset = data->unmap_ops - map->unmap_ops;
376397

377-
for (i = 0; i < pages; i++) {
378-
if (map->unmap_ops[offset+i].status)
379-
err = -EINVAL;
398+
for (i = 0; i < data->count; i++) {
399+
WARN_ON(map->unmap_ops[offset+i].status);
380400
pr_debug("unmap handle=%d st=%d\n",
381401
map->unmap_ops[offset+i].handle,
382402
map->unmap_ops[offset+i].status);
383403
map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
384404
if (use_ptemod) {
385-
if (map->kunmap_ops[offset+i].status)
386-
err = -EINVAL;
405+
WARN_ON(map->kunmap_ops[offset+i].status);
387406
pr_debug("kunmap handle=%u st=%d\n",
388407
map->kunmap_ops[offset+i].handle,
389408
map->kunmap_ops[offset+i].status);
390409
map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
391410
}
392411
}
393-
return err;
412+
/*
413+
* Decrease the live-grant counter. This must happen after the loop to
414+
* prevent premature reuse of the grants by gnttab_mmap().
415+
*/
416+
atomic_sub(data->count, &map->live_grants);
417+
418+
/* Release reference taken by __unmap_grant_pages */
419+
gntdev_put_map(NULL, map);
420+
}
421+
422+
static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
423+
int pages)
424+
{
425+
if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
426+
int pgno = (map->notify.addr >> PAGE_SHIFT);
427+
428+
if (pgno >= offset && pgno < offset + pages) {
429+
/* No need for kmap, pages are in lowmem */
430+
uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
431+
432+
tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
433+
map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
434+
}
435+
}
436+
437+
map->unmap_data.unmap_ops = map->unmap_ops + offset;
438+
map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
439+
map->unmap_data.pages = map->pages + offset;
440+
map->unmap_data.count = pages;
441+
map->unmap_data.done = __unmap_grant_pages_done;
442+
map->unmap_data.data = map;
443+
refcount_inc(&map->users); /* to keep map alive during async call below */
444+
445+
gnttab_unmap_refs_async(&map->unmap_data);
394446
}
395447

396-
static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
397-
int pages)
448+
static void unmap_grant_pages(struct gntdev_grant_map *map, int offset,
449+
int pages)
398450
{
399-
int range, err = 0;
451+
int range;
452+
453+
if (atomic_read(&map->live_grants) == 0)
454+
return; /* Nothing to do */
400455

401456
pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
402457

403458
/* It is possible the requested range will have a "hole" where we
404459
* already unmapped some of the grants. Only unmap valid ranges.
405460
*/
406-
while (pages && !err) {
407-
while (pages &&
408-
map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) {
461+
while (pages) {
462+
while (pages && map->being_removed[offset]) {
409463
offset++;
410464
pages--;
411465
}
412466
range = 0;
413467
while (range < pages) {
414-
if (map->unmap_ops[offset + range].handle ==
415-
INVALID_GRANT_HANDLE)
468+
if (map->being_removed[offset + range])
416469
break;
470+
map->being_removed[offset + range] = true;
417471
range++;
418472
}
419-
err = __unmap_grant_pages(map, offset, range);
473+
if (range)
474+
__unmap_grant_pages(map, offset, range);
420475
offset += range;
421476
pages -= range;
422477
}
423-
424-
return err;
425478
}
426479

427480
/* ------------------------------------------------------------------ */
@@ -473,7 +526,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
473526
struct gntdev_grant_map *map =
474527
container_of(mn, struct gntdev_grant_map, notifier);
475528
unsigned long mstart, mend;
476-
int err;
477529

478530
if (!mmu_notifier_range_blockable(range))
479531
return false;
@@ -494,10 +546,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
494546
map->index, map->count,
495547
map->vma->vm_start, map->vma->vm_end,
496548
range->start, range->end, mstart, mend);
497-
err = unmap_grant_pages(map,
549+
unmap_grant_pages(map,
498550
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
499551
(mend - mstart) >> PAGE_SHIFT);
500-
WARN_ON(err);
501552

502553
return true;
503554
}
@@ -985,6 +1036,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
9851036
goto unlock_out;
9861037
if (use_ptemod && map->vma)
9871038
goto unlock_out;
1039+
if (atomic_read(&map->live_grants)) {
1040+
err = -EAGAIN;
1041+
goto unlock_out;
1042+
}
9881043
refcount_inc(&map->users);
9891044

9901045
vma->vm_ops = &gntdev_vmops;

0 commit comments

Comments
 (0)