Skip to content

Commit 382b5b8

Browse files
author
Marc Zyngier
committed
Merge branch kvm-arm64/mte-map-shared into kvmarm-master/next
* kvm-arm64/mte-map-shared: : . : Update the MTE support to allow the VMM to use shared mappings : to back the memslots exposed to MTE-enabled guests. : : Patches courtesy of Catalin Marinas and Peter Collingbourne. : . : Fix a number of issues with MTE, such as races on the tags : being initialised vs the PG_mte_tagged flag as well as the : lack of support for VM_SHARED when KVM is involved. : : Patches from Catalin Marinas and Peter Collingbourne. : . Documentation: document the ABI changes for KVM_CAP_ARM_MTE KVM: arm64: permit all VM_MTE_ALLOWED mappings with MTE enabled KVM: arm64: unify the tests for VMAs in memslots when MTE is enabled arm64: mte: Lock a page for MTE tag initialisation mm: Add PG_arch_3 page flag KVM: arm64: Simplify the sanitise_mte_tags() logic arm64: mte: Fix/clarify the PG_mte_tagged semantics mm: Do not enable PG_arch_2 for all 64-bit architectures Signed-off-by: Marc Zyngier <[email protected]>
2 parents cfa7299 + a4baf8d commit 382b5b8

File tree

19 files changed

+152
-79
lines changed

19 files changed

+152
-79
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7385,8 +7385,9 @@ hibernation of the host; however the VMM needs to manually save/restore the
73857385
tags as appropriate if the VM is migrated.
73867386

73877387
When this capability is enabled all memory in memslots must be mapped as
7388-
not-shareable (no MAP_SHARED), attempts to create a memslot with a
7389-
MAP_SHARED mmap will result in an -EINVAL return.
7388+
``MAP_ANONYMOUS`` or with a RAM-based file mapping (``tmpfs``, ``memfd``),
7389+
attempts to create a memslot with an invalid mmap will result in an
7390+
-EINVAL return.
73907391

73917392
When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to
73927393
perform a bulk copy of tags to/from the guest.

arch/arm64/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1965,6 +1965,7 @@ config ARM64_MTE
19651965
depends on ARM64_PAN
19661966
select ARCH_HAS_SUBPAGE_FAULTS
19671967
select ARCH_USES_HIGH_VMA_FLAGS
1968+
select ARCH_USES_PG_ARCH_X
19681969
help
19691970
Memory Tagging (part of the ARMv8.5 Extensions) provides
19701971
architectural support for run-time, always-on detection of

arch/arm64/include/asm/mte.h

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ unsigned long mte_copy_tags_to_user(void __user *to, void *from,
2525
unsigned long n);
2626
int mte_save_tags(struct page *page);
2727
void mte_save_page_tags(const void *page_addr, void *tag_storage);
28-
bool mte_restore_tags(swp_entry_t entry, struct page *page);
28+
void mte_restore_tags(swp_entry_t entry, struct page *page);
2929
void mte_restore_page_tags(void *page_addr, const void *tag_storage);
3030
void mte_invalidate_tags(int type, pgoff_t offset);
3131
void mte_invalidate_tags_area(int type);
@@ -36,6 +36,58 @@ void mte_free_tag_storage(char *storage);
3636

3737
/* track which pages have valid allocation tags */
3838
#define PG_mte_tagged PG_arch_2
39+
/* simple lock to avoid multiple threads tagging the same page */
40+
#define PG_mte_lock PG_arch_3
41+
42+
static inline void set_page_mte_tagged(struct page *page)
43+
{
44+
/*
45+
* Ensure that the tags written prior to this function are visible
46+
* before the page flags update.
47+
*/
48+
smp_wmb();
49+
set_bit(PG_mte_tagged, &page->flags);
50+
}
51+
52+
static inline bool page_mte_tagged(struct page *page)
53+
{
54+
bool ret = test_bit(PG_mte_tagged, &page->flags);
55+
56+
/*
57+
* If the page is tagged, ensure ordering with a likely subsequent
58+
* read of the tags.
59+
*/
60+
if (ret)
61+
smp_rmb();
62+
return ret;
63+
}
64+
65+
/*
66+
* Lock the page for tagging and return 'true' if the page can be tagged,
67+
* 'false' if already tagged. PG_mte_tagged is never cleared and therefore the
68+
* locking only happens once for page initialisation.
69+
*
70+
* The page MTE lock state:
71+
*
72+
* Locked: PG_mte_lock && !PG_mte_tagged
73+
* Unlocked: !PG_mte_lock || PG_mte_tagged
74+
*
75+
* Acquire semantics only if the page is tagged (returning 'false').
76+
*/
77+
static inline bool try_page_mte_tagging(struct page *page)
78+
{
79+
if (!test_and_set_bit(PG_mte_lock, &page->flags))
80+
return true;
81+
82+
/*
83+
* The tags are either being initialised or may have been initialised
84+
* already. Check if the PG_mte_tagged flag has been set or wait
85+
* otherwise.
86+
*/
87+
smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged));
88+
89+
return false;
90+
}
3991

4092
void mte_zero_clear_page_tags(void *addr);
4193
void mte_sync_tags(pte_t old_pte, pte_t pte);
@@ -56,6 +108,17 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size);
56108
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
57109
#define PG_mte_tagged 0
58110

111+
static inline void set_page_mte_tagged(struct page *page)
112+
{
113+
}
114+
static inline bool page_mte_tagged(struct page *page)
115+
{
116+
return false;
117+
}
118+
static inline bool try_page_mte_tagging(struct page *page)
119+
{
120+
return false;
121+
}
59122
static inline void mte_zero_clear_page_tags(void *addr)
60123
{
61124
}

arch/arm64/include/asm/pgtable.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,8 +1049,8 @@ static inline void arch_swap_invalidate_area(int type)
10491049
#define __HAVE_ARCH_SWAP_RESTORE
10501050
static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
10511051
{
1052-
if (system_supports_mte() && mte_restore_tags(entry, &folio->page))
1053-
set_bit(PG_mte_tagged, &folio->flags);
1052+
if (system_supports_mte())
1053+
mte_restore_tags(entry, &folio->page);
10541054
}
10551055

10561056
#endif /* CONFIG_ARM64_MTE */

arch/arm64/kernel/cpufeature.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2074,8 +2074,10 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
20742074
* Clear the tags in the zero page. This needs to be done via the
20752075
* linear map which has the Tagged attribute.
20762076
*/
2077-
if (!test_and_set_bit(PG_mte_tagged, &ZERO_PAGE(0)->flags))
2077+
if (try_page_mte_tagging(ZERO_PAGE(0))) {
20782078
mte_clear_page_tags(lm_alias(empty_zero_page));
2079+
set_page_mte_tagged(ZERO_PAGE(0));
2080+
}
20792081

20802082
kasan_init_hw_tags_cpu();
20812083
}

arch/arm64/kernel/elfcore.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
4747
* Pages mapped in user space as !pte_access_permitted() (e.g.
4848
* PROT_EXEC only) may not have the PG_mte_tagged flag set.
4949
*/
50-
if (!test_bit(PG_mte_tagged, &page->flags)) {
50+
if (!page_mte_tagged(page)) {
5151
put_page(page);
5252
dump_skip(cprm, MTE_PAGE_TAG_STORAGE);
5353
continue;

arch/arm64/kernel/hibernate.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ static int swsusp_mte_save_tags(void)
271271
if (!page)
272272
continue;
273273

274-
if (!test_bit(PG_mte_tagged, &page->flags))
274+
if (!page_mte_tagged(page))
275275
continue;
276276

277277
ret = save_tags(page, pfn);

arch/arm64/kernel/mte.c

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,19 +41,17 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte,
4141
if (check_swap && is_swap_pte(old_pte)) {
4242
swp_entry_t entry = pte_to_swp_entry(old_pte);
4343

44-
if (!non_swap_entry(entry) && mte_restore_tags(entry, page))
45-
return;
44+
if (!non_swap_entry(entry))
45+
mte_restore_tags(entry, page);
4646
}
4747

4848
if (!pte_is_tagged)
4949
return;
5050

51-
/*
52-
* Test PG_mte_tagged again in case it was racing with another
53-
* set_pte_at().
54-
*/
55-
if (!test_and_set_bit(PG_mte_tagged, &page->flags))
51+
if (try_page_mte_tagging(page)) {
5652
mte_clear_page_tags(page_address(page));
53+
set_page_mte_tagged(page);
54+
}
5755
}
5856

5957
void mte_sync_tags(pte_t old_pte, pte_t pte)
@@ -69,9 +67,11 @@ void mte_sync_tags(pte_t old_pte, pte_t pte)
6967

7068
/* if PG_mte_tagged is set, tags have already been initialised */
7169
for (i = 0; i < nr_pages; i++, page++) {
72-
if (!test_bit(PG_mte_tagged, &page->flags))
70+
if (!page_mte_tagged(page)) {
7371
mte_sync_page_tags(page, old_pte, check_swap,
7472
pte_is_tagged);
73+
set_page_mte_tagged(page);
74+
}
7575
}
7676

7777
/* ensure the tags are visible before the PTE is set */
@@ -96,8 +96,7 @@ int memcmp_pages(struct page *page1, struct page *page2)
9696
* pages is tagged, set_pte_at() may zero or change the tags of the
9797
* other page via mte_sync_tags().
9898
*/
99-
if (test_bit(PG_mte_tagged, &page1->flags) ||
100-
test_bit(PG_mte_tagged, &page2->flags))
99+
if (page_mte_tagged(page1) || page_mte_tagged(page2))
101100
return addr1 != addr2;
102101

103102
return ret;
@@ -454,7 +453,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
454453
put_page(page);
455454
break;
456455
}
457-
WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags));
456+
WARN_ON_ONCE(!page_mte_tagged(page));
458457

459458
/* limit access to the end of the page */
460459
offset = offset_in_page(addr);

arch/arm64/kvm/guest.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,7 +1059,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
10591059
maddr = page_address(page);
10601060

10611061
if (!write) {
1062-
if (test_bit(PG_mte_tagged, &page->flags))
1062+
if (page_mte_tagged(page))
10631063
num_tags = mte_copy_tags_to_user(tags, maddr,
10641064
MTE_GRANULES_PER_PAGE);
10651065
else
@@ -1068,15 +1068,19 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
10681068
clear_user(tags, MTE_GRANULES_PER_PAGE);
10691069
kvm_release_pfn_clean(pfn);
10701070
} else {
1071+
/*
1072+
* Only locking to serialise with a concurrent
1073+
* set_pte_at() in the VMM but still overriding the
1074+
* tags, hence ignoring the return value.
1075+
*/
1076+
try_page_mte_tagging(page);
10711077
num_tags = mte_copy_tags_from_user(maddr, tags,
10721078
MTE_GRANULES_PER_PAGE);
10731079

1074-
/*
1075-
* Set the flag after checking the write
1076-
* completed fully
1077-
*/
1078-
if (num_tags == MTE_GRANULES_PER_PAGE)
1079-
set_bit(PG_mte_tagged, &page->flags);
1080+
/* uaccess failed, don't leave stale tags */
1081+
if (num_tags != MTE_GRANULES_PER_PAGE)
1082+
mte_clear_page_tags(page);
1083+
set_page_mte_tagged(page);
10801084

10811085
kvm_release_pfn_dirty(pfn);
10821086
}

arch/arm64/kvm/mmu.c

Lines changed: 22 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,32 +1164,26 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
11641164
* - mmap_lock protects between a VM faulting a page in and the VMM performing
11651165
* an mprotect() to add VM_MTE
11661166
*/
1167-
static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
1168-
unsigned long size)
1167+
static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
1168+
unsigned long size)
11691169
{
11701170
unsigned long i, nr_pages = size >> PAGE_SHIFT;
1171-
struct page *page;
1171+
struct page *page = pfn_to_page(pfn);
11721172

11731173
if (!kvm_has_mte(kvm))
1174-
return 0;
1175-
1176-
/*
1177-
* pfn_to_online_page() is used to reject ZONE_DEVICE pages
1178-
* that may not support tags.
1179-
*/
1180-
page = pfn_to_online_page(pfn);
1181-
1182-
if (!page)
1183-
return -EFAULT;
1174+
return;
11841175

11851176
for (i = 0; i < nr_pages; i++, page++) {
1186-
if (!test_bit(PG_mte_tagged, &page->flags)) {
1177+
if (try_page_mte_tagging(page)) {
11871178
mte_clear_page_tags(page_address(page));
1188-
set_bit(PG_mte_tagged, &page->flags);
1179+
set_page_mte_tagged(page);
11891180
}
11901181
}
1182+
}
11911183

1192-
return 0;
1184+
static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
1185+
{
1186+
return vma->vm_flags & VM_MTE_ALLOWED;
11931187
}
11941188

11951189
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
@@ -1200,7 +1194,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
12001194
bool write_fault, writable, force_pte = false;
12011195
bool exec_fault;
12021196
bool device = false;
1203-
bool shared;
12041197
unsigned long mmu_seq;
12051198
struct kvm *kvm = vcpu->kvm;
12061199
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
@@ -1247,8 +1240,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
12471240
vma_shift = get_vma_page_shift(vma, hva);
12481241
}
12491242

1250-
shared = (vma->vm_flags & VM_SHARED);
1251-
12521243
switch (vma_shift) {
12531244
#ifndef __PAGETABLE_PMD_FOLDED
12541245
case PUD_SHIFT:
@@ -1360,13 +1351,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
13601351
}
13611352

13621353
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
1363-
/* Check the VMM hasn't introduced a new VM_SHARED VMA */
1364-
if (!shared)
1365-
ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
1366-
else
1354+
/* Check the VMM hasn't introduced a new disallowed VMA */
1355+
if (kvm_vma_mte_allowed(vma)) {
1356+
sanitise_mte_tags(kvm, pfn, vma_pagesize);
1357+
} else {
13671358
ret = -EFAULT;
1368-
if (ret)
13691359
goto out_unlock;
1360+
}
13701361
}
13711362

13721363
if (writable)
@@ -1582,15 +1573,18 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
15821573
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
15831574
{
15841575
kvm_pfn_t pfn = pte_pfn(range->pte);
1585-
int ret;
15861576

15871577
if (!kvm->arch.mmu.pgt)
15881578
return false;
15891579

15901580
WARN_ON(range->end - range->start != 1);
15911581

1592-
ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
1593-
if (ret)
1582+
/*
1583+
* If the page isn't tagged, defer to user_mem_abort() for sanitising
1584+
* the MTE tags. The S2 pte should have been unmapped by
1585+
* mmu_notifier_invalidate_range_end().
1586+
*/
1587+
if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn)))
15941588
return false;
15951589

15961590
/*
@@ -1822,12 +1816,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
18221816
if (!vma)
18231817
break;
18241818

1825-
/*
1826-
* VM_SHARED mappings are not allowed with MTE to avoid races
1827-
* when updating the PG_mte_tagged page flag, see
1828-
* sanitise_mte_tags for more details.
1829-
*/
1830-
if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) {
1819+
if (kvm_has_mte(kvm) && !kvm_vma_mte_allowed(vma)) {
18311820
ret = -EINVAL;
18321821
break;
18331822
}

0 commit comments

Comments
 (0)