Skip to content

Commit 5c13a4a

Browse files
m-v-bjgross1
authored andcommitted
xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the following scenario correctly with paravirtualized (PV) Xen domains: * User process sets up a gntdev mapping composed of two grant mappings (i.e., two pages shared by another Xen domain). * User process munmap()s one of the pages. * User process munmap()s the remaining page. * User process exits. In the scenario above, the user process would cause the kernel to log the following messages in dmesg for the first munmap(), and the second munmap() call would result in similar log messages: BUG: Bad page map in process doublemap.test pte:... pmd:... page:0000000057c97bff refcount:1 mapcount:-1 \ mapping:0000000000000000 index:0x0 pfn:... ... page dumped because: bad pte ... file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0 ... Call Trace: <TASK> dump_stack_lvl+0x46/0x5e print_bad_pte.cold+0x66/0xb6 unmap_page_range+0x7e5/0xdc0 unmap_vmas+0x78/0xf0 unmap_region+0xa8/0x110 __do_munmap+0x1ea/0x4e0 __vm_munmap+0x75/0x120 __x64_sys_munmap+0x28/0x40 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x61/0xcb ... For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG) would print out the following and trigger a general protection fault in the affected Xen PV domain: (XEN) d0v... Attempt to implicitly unmap d0's grant PTE ... (XEN) d0v... Attempt to implicitly unmap d0's grant PTE ... As of this writing, gntdev_grant_map structure's vma field (referred to as map->vma below) is mainly used for checking the start and end addresses of mappings. However, with split VMAs, these may change, and there could be more than one VMA associated with a gntdev mapping. Hence, remove the use of map->vma and rely on map->pages_vm_start for the original start address and on (map->count << PAGE_SHIFT) for the original mapping size. Let the invalidate() and find_special_page() hooks use these. Also, given that there can be multiple VMAs associated with a gntdev mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to the end of gntdev_put_map, so that the MMU notifier is only removed after the closing of the last remaining VMA. Finally, use an atomic to prevent inadvertent gntdev mapping re-use, instead of using the map->live_grants atomic counter and/or the map->vma pointer (the latter of which is now removed). This prevents the userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the same address range as a previously set up gntdev mapping. This scenario can be summarized with the following call-trace, which was valid prior to this commit: mmap gntdev_mmap mmap (repeat mmap with MAP_FIXED over the same address range) gntdev_invalidate unmap_grant_pages (sets 'being_removed' entries to true) gnttab_unmap_refs_async unmap_single_vma gntdev_mmap (maps the shared pages again) munmap gntdev_invalidate unmap_grant_pages (no-op because 'being_removed' entries are true) unmap_single_vma (For PV domains, Xen reports that a granted page is being unmapped and triggers a general protection fault in the affected domain, if Xen was built with CONFIG_DEBUG) The fix for this last scenario could be worth its own commit, but we opted for a single commit, because removing the gntdev_grant_map structure's vma field requires guarding the entry to gntdev_mmap(), and the live_grants atomic counter is not sufficient on its own to prevent the mmap() over a pre-existing mapping. Link: QubesOS/qubes-issues#7631 Fixes: ab31523 ("xen/gntdev: allow usermode to map granted pages") Cc: [email protected] Signed-off-by: M. Vefa Bicakci <[email protected]> Reviewed-by: Juergen Gross <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Juergen Gross <[email protected]>
1 parent 0991028 commit 5c13a4a

File tree

2 files changed

+27
-34
lines changed

2 files changed

+27
-34
lines changed

drivers/xen/gntdev-common.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,10 @@ struct gntdev_unmap_notify {
4444
};
4545

4646
struct gntdev_grant_map {
47+
atomic_t in_use;
4748
struct mmu_interval_notifier notifier;
49+
bool notifier_init;
4850
struct list_head next;
49-
struct vm_area_struct *vma;
5051
int index;
5152
int count;
5253
int flags;

drivers/xen/gntdev.c

Lines changed: 25 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
286286
*/
287287
}
288288

289+
if (use_ptemod && map->notifier_init)
290+
mmu_interval_notifier_remove(&map->notifier);
291+
289292
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
290293
notify_remote_via_evtchn(map->notify.event);
291294
evtchn_put(map->notify.event);
@@ -298,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
298301
static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
299302
{
300303
struct gntdev_grant_map *map = data;
301-
unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
304+
unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT;
302305
int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
303306
(1 << _GNTMAP_guest_avail0);
304307
u64 pte_maddr;
@@ -508,11 +511,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
508511
struct gntdev_priv *priv = file->private_data;
509512

510513
pr_debug("gntdev_vma_close %p\n", vma);
511-
if (use_ptemod) {
512-
WARN_ON(map->vma != vma);
513-
mmu_interval_notifier_remove(&map->notifier);
514-
map->vma = NULL;
515-
}
514+
516515
vma->vm_private_data = NULL;
517516
gntdev_put_map(priv, map);
518517
}
@@ -540,29 +539,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
540539
struct gntdev_grant_map *map =
541540
container_of(mn, struct gntdev_grant_map, notifier);
542541
unsigned long mstart, mend;
542+
unsigned long map_start, map_end;
543543

544544
if (!mmu_notifier_range_blockable(range))
545545
return false;
546546

547+
map_start = map->pages_vm_start;
548+
map_end = map->pages_vm_start + (map->count << PAGE_SHIFT);
549+
547550
/*
548551
* If the VMA is split or otherwise changed the notifier is not
549552
* updated, but we don't want to process VA's outside the modified
550553
* VMA. FIXME: It would be much more understandable to just prevent
551554
* modifying the VMA in the first place.
552555
*/
553-
if (map->vma->vm_start >= range->end ||
554-
map->vma->vm_end <= range->start)
556+
if (map_start >= range->end || map_end <= range->start)
555557
return true;
556558

557-
mstart = max(range->start, map->vma->vm_start);
558-
mend = min(range->end, map->vma->vm_end);
559+
mstart = max(range->start, map_start);
560+
mend = min(range->end, map_end);
559561
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
560-
map->index, map->count,
561-
map->vma->vm_start, map->vma->vm_end,
562-
range->start, range->end, mstart, mend);
563-
unmap_grant_pages(map,
564-
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
565-
(mend - mstart) >> PAGE_SHIFT);
562+
map->index, map->count, map_start, map_end,
563+
range->start, range->end, mstart, mend);
564+
unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT,
565+
(mend - mstart) >> PAGE_SHIFT);
566566

567567
return true;
568568
}
@@ -1042,18 +1042,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
10421042
return -EINVAL;
10431043

10441044
pr_debug("map %d+%d at %lx (pgoff %lx)\n",
1045-
index, count, vma->vm_start, vma->vm_pgoff);
1045+
index, count, vma->vm_start, vma->vm_pgoff);
10461046

10471047
mutex_lock(&priv->lock);
10481048
map = gntdev_find_map_index(priv, index, count);
10491049
if (!map)
10501050
goto unlock_out;
1051-
if (use_ptemod && map->vma)
1052-
goto unlock_out;
1053-
if (atomic_read(&map->live_grants)) {
1054-
err = -EAGAIN;
1051+
if (!atomic_add_unless(&map->in_use, 1, 1))
10551052
goto unlock_out;
1056-
}
1053+
10571054
refcount_inc(&map->users);
10581055

10591056
vma->vm_ops = &gntdev_vmops;
@@ -1074,15 +1071,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
10741071
map->flags |= GNTMAP_readonly;
10751072
}
10761073

1074+
map->pages_vm_start = vma->vm_start;
1075+
10771076
if (use_ptemod) {
1078-
map->vma = vma;
10791077
err = mmu_interval_notifier_insert_locked(
10801078
&map->notifier, vma->vm_mm, vma->vm_start,
10811079
vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
1082-
if (err) {
1083-
map->vma = NULL;
1080+
if (err)
10841081
goto out_unlock_put;
1085-
}
1082+
1083+
map->notifier_init = true;
10861084
}
10871085
mutex_unlock(&priv->lock);
10881086

@@ -1099,7 +1097,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
10991097
*/
11001098
mmu_interval_read_begin(&map->notifier);
11011099

1102-
map->pages_vm_start = vma->vm_start;
11031100
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
11041101
vma->vm_end - vma->vm_start,
11051102
find_grant_ptes, map);
@@ -1128,13 +1125,8 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
11281125
out_unlock_put:
11291126
mutex_unlock(&priv->lock);
11301127
out_put_map:
1131-
if (use_ptemod) {
1128+
if (use_ptemod)
11321129
unmap_grant_pages(map, 0, map->count);
1133-
if (map->vma) {
1134-
mmu_interval_notifier_remove(&map->notifier);
1135-
map->vma = NULL;
1136-
}
1137-
}
11381130
gntdev_put_map(priv, map);
11391131
return err;
11401132
}

0 commit comments

Comments
 (0)