Skip to content

Commit 40fdea0

Browse files
jgross1bostrovs
authored andcommitted
xen/balloon: add late_initcall_sync() for initial ballooning done
When running as PVH or HVM guest with actual memory < max memory the hypervisor is using "populate on demand" in order to allow the guest to balloon down from its maximum memory size. For this to work correctly the guest must not touch more memory pages than its target memory size as otherwise the PoD cache will be exhausted and the guest is crashed as a result of that. In extreme cases ballooning down might not be finished today before the init process is started, which can consume lots of memory. In order to avoid random boot crashes in such cases, add a late init call to wait for ballooning down having finished for PVH/HVM guests. Warn on console if initial ballooning fails, panic() after stalling for more than 3 minutes per default. Add a module parameter for changing this timeout. [boris: replaced pr_info() with pr_notice()] Cc: <[email protected]> Reported-by: Marek Marczykowski-Górecki <[email protected]> Signed-off-by: Juergen Gross <[email protected]> Link: https://lore.kernel.org/r/[email protected] Reviewed-by: Boris Ostrovsky <[email protected]> Signed-off-by: Boris Ostrovsky <[email protected]>
1 parent eae446b commit 40fdea0

File tree

2 files changed

+70
-23
lines changed

2 files changed

+70
-23
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6357,6 +6357,13 @@
63576357
improve timer resolution at the expense of processing
63586358
more timer interrupts.
63596359

6360+
xen.balloon_boot_timeout= [XEN]
6361+
The time (in seconds) to wait before giving up to boot
6362+
in case initial ballooning fails to free enough memory.
6363+
Applies only when running as HVM or PVH guest and
6364+
started with less memory configured than allowed at
6365+
max. Default is 180.
6366+
63606367
xen.event_eoi_delay= [XEN]
63616368
How long to delay EOI handling in case of event
63626369
storms (jiffies). Default is 10.

drivers/xen/balloon.c

Lines changed: 63 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include <linux/percpu-defs.h>
5959
#include <linux/slab.h>
6060
#include <linux/sysctl.h>
61+
#include <linux/moduleparam.h>
6162

6263
#include <asm/page.h>
6364
#include <asm/tlb.h>
@@ -73,6 +74,12 @@
7374
#include <xen/page.h>
7475
#include <xen/mem-reservation.h>
7576

77+
#undef MODULE_PARAM_PREFIX
78+
#define MODULE_PARAM_PREFIX "xen."
79+
80+
static uint __read_mostly balloon_boot_timeout = 180;
81+
module_param(balloon_boot_timeout, uint, 0444);
82+
7683
static int xen_hotplug_unpopulated;
7784

7885
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
@@ -125,12 +132,12 @@ static struct ctl_table xen_root[] = {
125132
* BP_ECANCELED: error, balloon operation canceled.
126133
*/
127134

128-
enum bp_state {
135+
static enum bp_state {
129136
BP_DONE,
130137
BP_WAIT,
131138
BP_EAGAIN,
132139
BP_ECANCELED
133-
};
140+
} balloon_state = BP_DONE;
134141

135142
/* Main waiting point for xen-balloon thread. */
136143
static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq);
@@ -199,18 +206,15 @@ static struct page *balloon_next_page(struct page *page)
199206
return list_entry(next, struct page, lru);
200207
}
201208

202-
static enum bp_state update_schedule(enum bp_state state)
209+
static void update_schedule(void)
203210
{
204-
if (state == BP_WAIT)
205-
return BP_WAIT;
206-
207-
if (state == BP_ECANCELED)
208-
return BP_ECANCELED;
211+
if (balloon_state == BP_WAIT || balloon_state == BP_ECANCELED)
212+
return;
209213

210-
if (state == BP_DONE) {
214+
if (balloon_state == BP_DONE) {
211215
balloon_stats.schedule_delay = 1;
212216
balloon_stats.retry_count = 1;
213-
return BP_DONE;
217+
return;
214218
}
215219

216220
++balloon_stats.retry_count;
@@ -219,15 +223,16 @@ static enum bp_state update_schedule(enum bp_state state)
219223
balloon_stats.retry_count > balloon_stats.max_retry_count) {
220224
balloon_stats.schedule_delay = 1;
221225
balloon_stats.retry_count = 1;
222-
return BP_ECANCELED;
226+
balloon_state = BP_ECANCELED;
227+
return;
223228
}
224229

225230
balloon_stats.schedule_delay <<= 1;
226231

227232
if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
228233
balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
229234

230-
return BP_EAGAIN;
235+
balloon_state = BP_EAGAIN;
231236
}
232237

233238
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
@@ -494,9 +499,9 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
494499
* Stop waiting if either state is BP_DONE and ballooning action is
495500
* needed, or if the credit has changed while state is not BP_DONE.
496501
*/
497-
static bool balloon_thread_cond(enum bp_state state, long credit)
502+
static bool balloon_thread_cond(long credit)
498503
{
499-
if (state == BP_DONE)
504+
if (balloon_state == BP_DONE)
500505
credit = 0;
501506

502507
return current_credit() != credit || kthread_should_stop();
@@ -510,13 +515,12 @@ static bool balloon_thread_cond(enum bp_state state, long credit)
510515
*/
511516
static int balloon_thread(void *unused)
512517
{
513-
enum bp_state state = BP_DONE;
514518
long credit;
515519
unsigned long timeout;
516520

517521
set_freezable();
518522
for (;;) {
519-
switch (state) {
523+
switch (balloon_state) {
520524
case BP_DONE:
521525
case BP_ECANCELED:
522526
timeout = 3600 * HZ;
@@ -532,7 +536,7 @@ static int balloon_thread(void *unused)
532536
credit = current_credit();
533537

534538
wait_event_freezable_timeout(balloon_thread_wq,
535-
balloon_thread_cond(state, credit), timeout);
539+
balloon_thread_cond(credit), timeout);
536540

537541
if (kthread_should_stop())
538542
return 0;
@@ -543,22 +547,23 @@ static int balloon_thread(void *unused)
543547

544548
if (credit > 0) {
545549
if (balloon_is_inflated())
546-
state = increase_reservation(credit);
550+
balloon_state = increase_reservation(credit);
547551
else
548-
state = reserve_additional_memory();
552+
balloon_state = reserve_additional_memory();
549553
}
550554

551555
if (credit < 0) {
552556
long n_pages;
553557

554558
n_pages = min(-credit, si_mem_available());
555-
state = decrease_reservation(n_pages, GFP_BALLOON);
556-
if (state == BP_DONE && n_pages != -credit &&
559+
balloon_state = decrease_reservation(n_pages,
560+
GFP_BALLOON);
561+
if (balloon_state == BP_DONE && n_pages != -credit &&
557562
n_pages < totalreserve_pages)
558-
state = BP_EAGAIN;
563+
balloon_state = BP_EAGAIN;
559564
}
560565

561-
state = update_schedule(state);
566+
update_schedule();
562567

563568
mutex_unlock(&balloon_mutex);
564569

@@ -765,3 +770,38 @@ static int __init balloon_init(void)
765770
return 0;
766771
}
767772
subsys_initcall(balloon_init);
773+
774+
static int __init balloon_wait_finish(void)
775+
{
776+
long credit, last_credit = 0;
777+
unsigned long last_changed = 0;
778+
779+
if (!xen_domain())
780+
return -ENODEV;
781+
782+
/* PV guests don't need to wait. */
783+
if (xen_pv_domain() || !current_credit())
784+
return 0;
785+
786+
pr_notice("Waiting for initial ballooning down having finished.\n");
787+
788+
while ((credit = current_credit()) < 0) {
789+
if (credit != last_credit) {
790+
last_changed = jiffies;
791+
last_credit = credit;
792+
}
793+
if (balloon_state == BP_ECANCELED) {
794+
pr_warn_once("Initial ballooning failed, %ld pages need to be freed.\n",
795+
-credit);
796+
if (jiffies - last_changed >= HZ * balloon_boot_timeout)
797+
panic("Initial ballooning failed!\n");
798+
}
799+
800+
schedule_timeout_interruptible(HZ / 10);
801+
}
802+
803+
pr_notice("Initial ballooning down finished.\n");
804+
805+
return 0;
806+
}
807+
late_initcall_sync(balloon_wait_finish);

0 commit comments

Comments
 (0)