Skip to content

Commit e9ba21f

Browse files
committed
Merge tag 'kvm-s390-next-6.16-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
* Fix interaction between some filesystems and Secure Execution * Some cleanups and refactorings, preparing for an upcoming big series
2 parents 8e86e73 + d6c8097 commit e9ba21f

25 files changed

+482
-392
lines changed

MAINTAINERS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13102,12 +13102,14 @@ S: Supported
1310213102
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
1310313103
F: Documentation/virt/kvm/s390*
1310413104
F: arch/s390/include/asm/gmap.h
13105+
F: arch/s390/include/asm/gmap_helpers.h
1310513106
F: arch/s390/include/asm/kvm*
1310613107
F: arch/s390/include/uapi/asm/kvm*
1310713108
F: arch/s390/include/uapi/asm/uvdevice.h
1310813109
F: arch/s390/kernel/uv.c
1310913110
F: arch/s390/kvm/
1311013111
F: arch/s390/mm/gmap.c
13112+
F: arch/s390/mm/gmap_helpers.c
1311113113
F: drivers/s390/char/uvdevice.c
1311213114
F: tools/testing/selftests/drivers/s390x/uvdevice/
1311313115
F: tools/testing/selftests/kvm/*/s390/

arch/s390/include/asm/gmap.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,6 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
110110
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
111111
unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
112112
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
113-
void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
114113
void __gmap_zap(struct gmap *, unsigned long gaddr);
115114
void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
116115

@@ -134,7 +133,6 @@ int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned
134133

135134
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
136135
unsigned long gaddr, unsigned long vmaddr);
137-
int s390_disable_cow_sharing(void);
138136
int s390_replace_asce(struct gmap *gmap);
139137
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
140138
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,

arch/s390/include/asm/gmap_helpers.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Helper functions for KVM guest address space mapping code
4+
*
5+
* Copyright IBM Corp. 2025
6+
*/
7+
8+
#ifndef _ASM_S390_GMAP_HELPERS_H
9+
#define _ASM_S390_GMAP_HELPERS_H
10+
11+
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
12+
void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
13+
int gmap_helper_disable_cow_sharing(void);
14+
15+
#endif /* _ASM_S390_GMAP_HELPERS_H */

arch/s390/include/asm/tlb.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
3636

3737
#include <asm/tlbflush.h>
3838
#include <asm-generic/tlb.h>
39+
#include <asm/gmap.h>
3940

4041
/*
4142
* Release the page cache reference for a pte removed by

arch/s390/include/asm/uv.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include <linux/bug.h>
1717
#include <linux/sched.h>
1818
#include <asm/page.h>
19-
#include <asm/gmap.h>
2019
#include <asm/asm.h>
2120

2221
#define UVC_CC_OK 0

arch/s390/kernel/uv.c

Lines changed: 78 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/pagemap.h>
1616
#include <linux/swap.h>
1717
#include <linux/pagewalk.h>
18+
#include <linux/backing-dev.h>
1819
#include <asm/facility.h>
1920
#include <asm/sections.h>
2021
#include <asm/uv.h>
@@ -135,7 +136,7 @@ int uv_destroy_folio(struct folio *folio)
135136
{
136137
int rc;
137138

138-
/* See gmap_make_secure(): large folios cannot be secure */
139+
/* Large folios cannot be secure */
139140
if (unlikely(folio_test_large(folio)))
140141
return 0;
141142

@@ -184,7 +185,7 @@ int uv_convert_from_secure_folio(struct folio *folio)
184185
{
185186
int rc;
186187

187-
/* See gmap_make_secure(): large folios cannot be secure */
188+
/* Large folios cannot be secure */
188189
if (unlikely(folio_test_large(folio)))
189190
return 0;
190191

@@ -324,32 +325,87 @@ static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct u
324325
}
325326

326327
/**
327-
* s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split.
328+
* s390_wiggle_split_folio() - try to drain extra references to a folio and
329+
* split the folio if it is large.
328330
* @mm: the mm containing the folio to work on
329331
* @folio: the folio
330-
* @split: whether to split a large folio
331332
*
332333
* Context: Must be called while holding an extra reference to the folio;
333334
* the mm lock should not be held.
334-
* Return: 0 if the folio was split successfully;
335-
* -EAGAIN if the folio was not split successfully but another attempt
336-
* can be made, or if @split was set to false;
337-
* -EINVAL in case of other errors. See split_folio().
335+
* Return: 0 if the operation was successful;
336+
* -EAGAIN if splitting the large folio was not successful,
337+
* but another attempt can be made;
338+
* -EINVAL in case of other folio splitting errors. See split_folio().
338339
*/
339-
static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split)
340+
static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
340341
{
341-
int rc;
342+
int rc, tried_splits;
342343

343344
lockdep_assert_not_held(&mm->mmap_lock);
344345
folio_wait_writeback(folio);
345346
lru_add_drain_all();
346-
if (split) {
347+
348+
if (!folio_test_large(folio))
349+
return 0;
350+
351+
for (tried_splits = 0; tried_splits < 2; tried_splits++) {
352+
struct address_space *mapping;
353+
loff_t lstart, lend;
354+
struct inode *inode;
355+
347356
folio_lock(folio);
348357
rc = split_folio(folio);
358+
if (rc != -EBUSY) {
359+
folio_unlock(folio);
360+
return rc;
361+
}
362+
363+
/*
364+
* Splitting with -EBUSY can fail for various reasons, but we
365+
* have to handle one case explicitly for now: some mappings
366+
* don't allow for splitting dirty folios; writeback will
367+
* mark them clean again, including marking all page table
368+
* entries mapping the folio read-only, to catch future write
369+
* attempts.
370+
*
371+
* While the system should be writing back dirty folios in the
372+
* background, we obtained this folio by looking up a writable
373+
* page table entry. On these problematic mappings, writable
374+
* page table entries imply dirty folios, preventing the
375+
* split in the first place.
376+
*
377+
* To prevent a livelock when trigger writeback manually and
378+
* letting the caller look up the folio again in the page
379+
* table (turning it dirty), immediately try to split again.
380+
*
381+
* This is only a problem for some mappings (e.g., XFS);
382+
* mappings that do not support writeback (e.g., shmem) do not
383+
* apply.
384+
*/
385+
if (!folio_test_dirty(folio) || folio_test_anon(folio) ||
386+
!folio->mapping || !mapping_can_writeback(folio->mapping)) {
387+
folio_unlock(folio);
388+
break;
389+
}
390+
391+
/*
392+
* Ideally, we'd only trigger writeback on this exact folio. But
393+
* there is no easy way to do that, so we'll stabilize the
394+
* mapping while we still hold the folio lock, so we can drop
395+
* the folio lock to trigger writeback on the range currently
396+
* covered by the folio instead.
397+
*/
398+
mapping = folio->mapping;
399+
lstart = folio_pos(folio);
400+
lend = lstart + folio_size(folio) - 1;
401+
inode = igrab(mapping->host);
349402
folio_unlock(folio);
350403

351-
if (rc != -EBUSY)
352-
return rc;
404+
if (unlikely(!inode))
405+
break;
406+
407+
filemap_write_and_wait_range(mapping, lstart, lend);
408+
iput(mapping->host);
353409
}
354410
return -EAGAIN;
355411
}
@@ -393,8 +449,11 @@ int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header
393449
folio_walk_end(&fw, vma);
394450
mmap_read_unlock(mm);
395451

396-
if (rc == -E2BIG || rc == -EBUSY)
397-
rc = s390_wiggle_split_folio(mm, folio, rc == -E2BIG);
452+
if (rc == -E2BIG || rc == -EBUSY) {
453+
rc = s390_wiggle_split_folio(mm, folio);
454+
if (!rc)
455+
rc = -EAGAIN;
456+
}
398457
folio_put(folio);
399458

400459
return rc;
@@ -403,15 +462,15 @@ EXPORT_SYMBOL_GPL(make_hva_secure);
403462

404463
/*
405464
* To be called with the folio locked or with an extra reference! This will
406-
* prevent gmap_make_secure from touching the folio concurrently. Having 2
407-
* parallel arch_make_folio_accessible is fine, as the UV calls will become a
408-
* no-op if the folio is already exported.
465+
* prevent kvm_s390_pv_make_secure() from touching the folio concurrently.
466+
* Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will
467+
* become a no-op if the folio is already exported.
409468
*/
410469
int arch_make_folio_accessible(struct folio *folio)
411470
{
412471
int rc = 0;
413472

414-
/* See gmap_make_secure(): large folios cannot be secure */
473+
/* Large folios cannot be secure */
415474
if (unlikely(folio_test_large(folio)))
416475
return 0;
417476

arch/s390/kvm/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
88
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
99

1010
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
11-
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o
11+
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o
1212

1313
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
1414
obj-$(CONFIG_KVM) += kvm.o

arch/s390/kvm/diag.c

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,30 @@
1111
#include <linux/kvm.h>
1212
#include <linux/kvm_host.h>
1313
#include <asm/gmap.h>
14+
#include <asm/gmap_helpers.h>
1415
#include <asm/virtio-ccw.h>
1516
#include "kvm-s390.h"
1617
#include "trace.h"
1718
#include "trace-s390.h"
1819
#include "gaccess.h"
1920

21+
static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end)
22+
{
23+
struct kvm_memslot_iter iter;
24+
struct kvm_memory_slot *slot;
25+
struct kvm_memslots *slots;
26+
unsigned long start, end;
27+
28+
slots = kvm_vcpu_memslots(vcpu);
29+
30+
kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
31+
slot = iter.slot;
32+
start = __gfn_to_hva_memslot(slot, max(gfn_start, slot->base_gfn));
33+
end = __gfn_to_hva_memslot(slot, min(gfn_end, slot->base_gfn + slot->npages));
34+
gmap_helper_discard(vcpu->kvm->mm, start, end);
35+
}
36+
}
37+
2038
static int diag_release_pages(struct kvm_vcpu *vcpu)
2139
{
2240
unsigned long start, end;
@@ -32,26 +50,28 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
3250

3351
VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
3452

53+
mmap_read_lock(vcpu->kvm->mm);
3554
/*
3655
* We checked for start >= end above, so lets check for the
3756
* fast path (no prefix swap page involved)
3857
*/
3958
if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
40-
gmap_discard(vcpu->arch.gmap, start, end);
59+
do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(end));
4160
} else {
4261
/*
4362
* This is slow path. gmap_discard will check for start
4463
* so lets split this into before prefix, prefix, after
4564
* prefix and let gmap_discard make some of these calls
4665
* NOPs.
4766
*/
48-
gmap_discard(vcpu->arch.gmap, start, prefix);
67+
do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(prefix));
4968
if (start <= prefix)
50-
gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
69+
do_discard_gfn_range(vcpu, 0, 1);
5170
if (end > prefix + PAGE_SIZE)
52-
gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
53-
gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
71+
do_discard_gfn_range(vcpu, 1, 2);
72+
do_discard_gfn_range(vcpu, gpa_to_gfn(prefix) + 2, gpa_to_gfn(end));
5473
}
74+
mmap_read_unlock(vcpu->kvm->mm);
5575
return 0;
5676
}
5777

arch/s390/kvm/gaccess.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616
#include <asm/gmap.h>
1717
#include <asm/dat-bits.h>
1818
#include "kvm-s390.h"
19-
#include "gmap.h"
2019
#include "gaccess.h"
2120

21+
#define GMAP_SHADOW_FAKE_TABLE 1ULL
22+
2223
/*
2324
* vaddress union in order to easily decode a virtual address into its
2425
* region first index, region second index etc. parts.

arch/s390/kvm/gmap-vsie.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#include <asm/uv.h>
2323

2424
#include "kvm-s390.h"
25-
#include "gmap.h"
2625

2726
/**
2827
* gmap_find_shadow - find a specific asce in the list of shadow tables

0 commit comments

Comments
 (0)