Skip to content

Commit bf49169

Browse files
rikvanrielakpm00
authored andcommitted
hugetlbfs: extend hugetlb_vma_lock to private VMAs
Extend the locking scheme used to protect shared hugetlb mappings from truncate vs page fault races, in order to protect private hugetlb mappings (with resv_map) against MADV_DONTNEED. Add a read-write semaphore to the resv_map data structure, and use that from the hugetlb_vma_(un)lock_* functions, in preparation for closing the race between MADV_DONTNEED and page faults. Link: https://lkml.kernel.org/r/[email protected] Fixes: 04ada09 ("hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing") Signed-off-by: Rik van Riel <[email protected]> Reviewed-by: Mike Kravetz <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Muchun Song <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 92fe9dc commit bf49169

File tree

2 files changed

+43
-4
lines changed

2 files changed

+43
-4
lines changed

include/linux/hugetlb.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct resv_map {
6060
long adds_in_progress;
6161
struct list_head region_cache;
6262
long region_cache_count;
63+
struct rw_semaphore rw_sema;
6364
#ifdef CONFIG_CGROUP_HUGETLB
6465
/*
6566
* On private mappings, the counter to uncharge reservations is stored
@@ -1233,6 +1234,11 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma)
12331234
return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data;
12341235
}
12351236

1237+
static inline bool __vma_private_lock(struct vm_area_struct *vma)
1238+
{
1239+
return (!(vma->vm_flags & VM_MAYSHARE)) && vma->vm_private_data;
1240+
}
1241+
12361242
/*
12371243
* Safe version of huge_pte_offset() to check the locks. See comments
12381244
* above huge_pte_offset().

mm/hugetlb.c

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
9797
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
9898
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
9999
unsigned long start, unsigned long end);
100+
static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
100101

101102
static inline bool subpool_is_free(struct hugepage_subpool *spool)
102103
{
@@ -267,6 +268,10 @@ void hugetlb_vma_lock_read(struct vm_area_struct *vma)
267268
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
268269

269270
down_read(&vma_lock->rw_sema);
271+
} else if (__vma_private_lock(vma)) {
272+
struct resv_map *resv_map = vma_resv_map(vma);
273+
274+
down_read(&resv_map->rw_sema);
270275
}
271276
}
272277

@@ -276,6 +281,10 @@ void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
276281
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
277282

278283
up_read(&vma_lock->rw_sema);
284+
} else if (__vma_private_lock(vma)) {
285+
struct resv_map *resv_map = vma_resv_map(vma);
286+
287+
up_read(&resv_map->rw_sema);
279288
}
280289
}
281290

@@ -285,6 +294,10 @@ void hugetlb_vma_lock_write(struct vm_area_struct *vma)
285294
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
286295

287296
down_write(&vma_lock->rw_sema);
297+
} else if (__vma_private_lock(vma)) {
298+
struct resv_map *resv_map = vma_resv_map(vma);
299+
300+
down_write(&resv_map->rw_sema);
288301
}
289302
}
290303

@@ -294,17 +307,27 @@ void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
294307
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
295308

296309
up_write(&vma_lock->rw_sema);
310+
} else if (__vma_private_lock(vma)) {
311+
struct resv_map *resv_map = vma_resv_map(vma);
312+
313+
up_write(&resv_map->rw_sema);
297314
}
298315
}
299316

300317
int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
301318
{
302-
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
303319

304-
if (!__vma_shareable_lock(vma))
305-
return 1;
320+
if (__vma_shareable_lock(vma)) {
321+
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
306322

307-
return down_write_trylock(&vma_lock->rw_sema);
323+
return down_write_trylock(&vma_lock->rw_sema);
324+
} else if (__vma_private_lock(vma)) {
325+
struct resv_map *resv_map = vma_resv_map(vma);
326+
327+
return down_write_trylock(&resv_map->rw_sema);
328+
}
329+
330+
return 1;
308331
}
309332

310333
void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
@@ -313,6 +336,10 @@ void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
313336
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
314337

315338
lockdep_assert_held(&vma_lock->rw_sema);
339+
} else if (__vma_private_lock(vma)) {
340+
struct resv_map *resv_map = vma_resv_map(vma);
341+
342+
lockdep_assert_held(&resv_map->rw_sema);
316343
}
317344
}
318345

@@ -345,6 +372,11 @@ static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
345372
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
346373

347374
__hugetlb_vma_unlock_write_put(vma_lock);
375+
} else if (__vma_private_lock(vma)) {
376+
struct resv_map *resv_map = vma_resv_map(vma);
377+
378+
/* no free for anon vmas, but still need to unlock */
379+
up_write(&resv_map->rw_sema);
348380
}
349381
}
350382

@@ -1068,6 +1100,7 @@ struct resv_map *resv_map_alloc(void)
10681100
kref_init(&resv_map->refs);
10691101
spin_lock_init(&resv_map->lock);
10701102
INIT_LIST_HEAD(&resv_map->regions);
1103+
init_rwsem(&resv_map->rw_sema);
10711104

10721105
resv_map->adds_in_progress = 0;
10731106
/*

0 commit comments

Comments
 (0)