Skip to content

Commit 38620fc

Browse files
roygerjgross1
authored andcommitted
x86/xen: attempt to inflate the memory balloon on PVH
When running as PVH or HVM Linux will use holes in the memory map as scratch space to map grants, foreign domain pages and possibly miscellaneous other stuff. However the usage of such memory map holes for Xen purposes can be problematic. The request of holesby Xen happen quite early in the kernel boot process (grant table setup already uses scratch map space), and it's possible that by then not all devices have reclaimed their MMIO space. It's not unlikely for chunks of Xen scratch map space to end up using PCI bridge MMIO window memory, which (as expected) causes quite a lot of issues in the system. At least for PVH dom0 we have the possibility of using regions marked as UNUSABLE in the e820 memory map. Either if the region is UNUSABLE in the native memory map, or it has been converted into UNUSABLE in order to hide RAM regions from dom0, the second stage translation page-tables can populate those areas without issues. PV already has this kind of logic, where the balloon driver is inflated at boot. Re-use the current logic in order to also inflate it when running as PVH. onvert UNUSABLE regions up to the ratio specified in EXTRA_MEM_RATIO to RAM, while reserving them using xen_add_extra_mem() (which is also moved so it's no longer tied to CONFIG_PV). [jgross: fixed build for CONFIG_PVH without CONFIG_XEN_PVH] Signed-off-by: Roger Pau Monné <[email protected]> Reviewed-by: Juergen Gross <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Juergen Gross <[email protected]>
1 parent cc87253 commit 38620fc

File tree

7 files changed

+122
-46
lines changed

7 files changed

+122
-46
lines changed

arch/x86/include/asm/xen/hypervisor.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ void xen_arch_unregister_cpu(int num);
6262
#ifdef CONFIG_PVH
6363
void __init xen_pvh_init(struct boot_params *boot_params);
6464
void __init mem_map_via_hcall(struct boot_params *boot_params_p);
65+
#ifdef CONFIG_XEN_PVH
66+
void __init xen_reserve_extra_memory(struct boot_params *bootp);
67+
#else
68+
static inline void xen_reserve_extra_memory(struct boot_params *bootp) { }
69+
#endif
6570
#endif
6671

6772
/* Lazy mode for batching updates / context switch */

arch/x86/platform/pvh/enlighten.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ static void __init init_pvh_bootparams(bool xen_guest)
7474
} else
7575
xen_raw_printk("Warning: Can fit ISA range into e820\n");
7676

77+
if (xen_guest)
78+
xen_reserve_extra_memory(&pvh_bootparams);
79+
7780
pvh_bootparams.hdr.cmd_line_ptr =
7881
pvh_start_info.cmdline_paddr;
7982

arch/x86/xen/enlighten.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <linux/console.h>
77
#include <linux/cpu.h>
88
#include <linux/kexec.h>
9+
#include <linux/memblock.h>
910
#include <linux/slab.h>
1011
#include <linux/panic_notifier.h>
1112

@@ -350,3 +351,34 @@ void xen_arch_unregister_cpu(int num)
350351
}
351352
EXPORT_SYMBOL(xen_arch_unregister_cpu);
352353
#endif
354+
355+
/* Amount of extra memory space we add to the e820 ranges */
356+
struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
357+
358+
void __init xen_add_extra_mem(unsigned long start_pfn, unsigned long n_pfns)
359+
{
360+
unsigned int i;
361+
362+
/*
363+
* No need to check for zero size, should happen rarely and will only
364+
* write a new entry regarded to be unused due to zero size.
365+
*/
366+
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
367+
/* Add new region. */
368+
if (xen_extra_mem[i].n_pfns == 0) {
369+
xen_extra_mem[i].start_pfn = start_pfn;
370+
xen_extra_mem[i].n_pfns = n_pfns;
371+
break;
372+
}
373+
/* Append to existing region. */
374+
if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
375+
start_pfn) {
376+
xen_extra_mem[i].n_pfns += n_pfns;
377+
break;
378+
}
379+
}
380+
if (i == XEN_EXTRA_MEM_MAX_REGIONS)
381+
printk(KERN_WARNING "Warning: not enough extra memory regions\n");
382+
383+
memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
384+
}

arch/x86/xen/enlighten_pvh.c

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0
22
#include <linux/acpi.h>
33
#include <linux/export.h>
4+
#include <linux/mm.h>
45

56
#include <xen/hvc-console.h>
67

@@ -72,3 +73,70 @@ void __init mem_map_via_hcall(struct boot_params *boot_params_p)
7273
}
7374
boot_params_p->e820_entries = memmap.nr_entries;
7475
}
76+
77+
/*
78+
* Reserve e820 UNUSABLE regions to inflate the memory balloon.
79+
*
80+
* On PVH dom0 the host memory map is used, RAM regions available to dom0 are
81+
* located as the same place as in the native memory map, but since dom0 gets
82+
* less memory than the total amount of host RAM the ranges that can't be
83+
* populated are converted from RAM -> UNUSABLE. Use such regions (up to the
84+
* ratio signaled in EXTRA_MEM_RATIO) in order to inflate the balloon driver at
85+
* boot. Doing so prevents the guest (even if just temporary) from using holes
86+
* in the memory map in order to map grants or foreign addresses, and
87+
* hopefully limits the risk of a clash with a device MMIO region. Ideally the
88+
* hypervisor should notify us which memory ranges are suitable for creating
89+
* foreign mappings, but that's not yet implemented.
90+
*/
91+
void __init xen_reserve_extra_memory(struct boot_params *bootp)
92+
{
93+
unsigned int i, ram_pages = 0, extra_pages;
94+
95+
for (i = 0; i < bootp->e820_entries; i++) {
96+
struct boot_e820_entry *e = &bootp->e820_table[i];
97+
98+
if (e->type != E820_TYPE_RAM)
99+
continue;
100+
ram_pages += PFN_DOWN(e->addr + e->size) - PFN_UP(e->addr);
101+
}
102+
103+
/* Max amount of extra memory. */
104+
extra_pages = EXTRA_MEM_RATIO * ram_pages;
105+
106+
/*
107+
* Convert UNUSABLE ranges to RAM and reserve them for foreign mapping
108+
* purposes.
109+
*/
110+
for (i = 0; i < bootp->e820_entries && extra_pages; i++) {
111+
struct boot_e820_entry *e = &bootp->e820_table[i];
112+
unsigned long pages;
113+
114+
if (e->type != E820_TYPE_UNUSABLE)
115+
continue;
116+
117+
pages = min(extra_pages,
118+
PFN_DOWN(e->addr + e->size) - PFN_UP(e->addr));
119+
120+
if (pages != (PFN_DOWN(e->addr + e->size) - PFN_UP(e->addr))) {
121+
struct boot_e820_entry *next;
122+
123+
if (bootp->e820_entries ==
124+
ARRAY_SIZE(bootp->e820_table))
125+
/* No space left to split - skip region. */
126+
continue;
127+
128+
/* Split entry. */
129+
next = e + 1;
130+
memmove(next, e,
131+
(bootp->e820_entries - i) * sizeof(*e));
132+
bootp->e820_entries++;
133+
next->addr = PAGE_ALIGN(e->addr) + PFN_PHYS(pages);
134+
e->size = next->addr - e->addr;
135+
next->size -= e->size;
136+
}
137+
e->type = E820_TYPE_RAM;
138+
extra_pages -= pages;
139+
140+
xen_add_extra_mem(PFN_UP(e->addr), pages);
141+
}
142+
}

arch/x86/xen/setup.c

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,6 @@
3838

3939
#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
4040

41-
/* Amount of extra memory space we add to the e820 ranges */
42-
struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
43-
4441
/* Number of pages released from the initial allocation. */
4542
unsigned long xen_released_pages;
4643

@@ -64,18 +61,6 @@ static struct {
6461
} xen_remap_buf __initdata __aligned(PAGE_SIZE);
6562
static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
6663

67-
/*
68-
* The maximum amount of extra memory compared to the base size. The
69-
* main scaling factor is the size of struct page. At extreme ratios
70-
* of base:extra, all the base memory can be filled with page
71-
* structures for the extra memory, leaving no space for anything
72-
* else.
73-
*
74-
* 10x seems like a reasonable balance between scaling flexibility and
75-
* leaving a practically usable system.
76-
*/
77-
#define EXTRA_MEM_RATIO (10)
78-
7964
static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
8065

8166
static void __init xen_parse_512gb(void)
@@ -96,35 +81,6 @@ static void __init xen_parse_512gb(void)
9681
xen_512gb_limit = val;
9782
}
9883

99-
static void __init xen_add_extra_mem(unsigned long start_pfn,
100-
unsigned long n_pfns)
101-
{
102-
int i;
103-
104-
/*
105-
* No need to check for zero size, should happen rarely and will only
106-
* write a new entry regarded to be unused due to zero size.
107-
*/
108-
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
109-
/* Add new region. */
110-
if (xen_extra_mem[i].n_pfns == 0) {
111-
xen_extra_mem[i].start_pfn = start_pfn;
112-
xen_extra_mem[i].n_pfns = n_pfns;
113-
break;
114-
}
115-
/* Append to existing region. */
116-
if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
117-
start_pfn) {
118-
xen_extra_mem[i].n_pfns += n_pfns;
119-
break;
120-
}
121-
}
122-
if (i == XEN_EXTRA_MEM_MAX_REGIONS)
123-
printk(KERN_WARNING "Warning: not enough extra memory regions\n");
124-
125-
memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
126-
}
127-
12884
static void __init xen_del_extra_mem(unsigned long start_pfn,
12985
unsigned long n_pfns)
13086
{

arch/x86/xen/xen-ops.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,4 +163,18 @@ void xen_hvm_post_suspend(int suspend_cancelled);
163163
static inline void xen_hvm_post_suspend(int suspend_cancelled) {}
164164
#endif
165165

166+
/*
167+
* The maximum amount of extra memory compared to the base size. The
168+
* main scaling factor is the size of struct page. At extreme ratios
169+
* of base:extra, all the base memory can be filled with page
170+
* structures for the extra memory, leaving no space for anything
171+
* else.
172+
*
173+
* 10x seems like a reasonable balance between scaling flexibility and
174+
* leaving a practically usable system.
175+
*/
176+
#define EXTRA_MEM_RATIO (10)
177+
178+
void xen_add_extra_mem(unsigned long start_pfn, unsigned long n_pfns);
179+
166180
#endif /* XEN_OPS_H */

drivers/xen/balloon.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,6 @@ EXPORT_SYMBOL(xen_free_ballooned_pages);
672672

673673
static void __init balloon_add_regions(void)
674674
{
675-
#if defined(CONFIG_XEN_PV)
676675
unsigned long start_pfn, pages;
677676
unsigned long pfn, extra_pfn_end;
678677
unsigned int i;
@@ -696,7 +695,6 @@ static void __init balloon_add_regions(void)
696695

697696
balloon_stats.total_pages += extra_pfn_end - start_pfn;
698697
}
699-
#endif
700698
}
701699

702700
static int __init balloon_init(void)

0 commit comments

Comments
 (0)