Skip to content

Commit 6a2e5ac

Browse files
authored
CA-423204: use xen field meminfo.claimed to calculate available memory in node for new VM (#6867)
The available memory in the node that can actually be further claimed in the node for a new VM needs to be calculated as (meminfo.memfree - meminfo.claimed). Considering only meminfo.memfree does not consider VMs that may have claimed in the node but not yet finished allocating the claimed amount. A new debug line during the calculation makes it easier to understand the different node memory values being considered and calculated for the VMs being created, eg: ``` 2026-01-27T00:03:44.788754+00:00 orca xenopsd-xc: [debug||22 |Async.VM.start_on R:4533f11cb6bf|xenops] mem_claimable_for_new_vm: NUMA nodeid=0, domid=69: memfree=281696272384 memsize=825707462656 claimed=249459507200: available=32236765184 2026-01-27T00:03:44.788770+00:00 orca xenopsd-xc: [debug||22 |Async.VM.start_on R:4533f11cb6bf|xenops] mem_claimable_for_new_vm: NUMA nodeid=1, domid=69: memfree=336155111424 memsize=824633720832 claimed=303146598400: available=33008513024 ... ``` Tested with: * SR-233407 (xs9 numa functional regression test) * job 4541679, where this change calculates the correct nodes to start the VMs in a host with 8 nodes and high contention of VMs starting simultaneously.
2 parents e83992f + 93a0546 commit 6a2e5ac

File tree

5 files changed

+143
-15
lines changed

5 files changed

+143
-15
lines changed

ocaml/libs/xenctrl-ext/xenctrlext.ml

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,18 @@ exception Not_available
126126
let domain_claim_pages handle domid ?(numa_node = NumaNode.none) nr_pages =
127127
stub_domain_claim_pages handle domid numa_node nr_pages
128128

129+
module HostNuma = struct
130+
(* Numa state of a host *)
131+
132+
type node_meminfo = {size: int64; free: int64; claimed: int64}
133+
134+
external numa_get_meminfo : handle -> node_meminfo array
135+
= "stub_xenctrlext_numa_meminfo"
136+
end
137+
129138
let get_nr_nodes handle =
130-
let info = numainfo handle in
131-
Array.length info.memory
139+
let meminfo = HostNuma.numa_get_meminfo handle in
140+
Array.length meminfo
132141

133142
module DomainNuma = struct
134143
(* Numa state of a domain *)

ocaml/libs/xenctrl-ext/xenctrlext.mli

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,9 @@ module DomainNuma : sig
121121

122122
val state : handle -> domid:int -> t
123123
end
124+
125+
module HostNuma : sig
126+
type node_meminfo = {size: int64; free: int64; claimed: int64}
127+
128+
val numa_get_meminfo : handle -> node_meminfo array
129+
end

ocaml/libs/xenctrl-ext/xenctrlext_stubs.c

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,104 @@ CAMLprim value stub_xc_domain_numa_get_node_pages_wrapper(value xch_val, value d
715715
#endif
716716
}
717717

718+
/*
719+
* Get NUMA memory info with claimed memory support
720+
*
721+
* Falls back to previous xc_numainfo with claimed=0
722+
* if XEN_SYSCTL_numa_meminfo is not available at compile time or runtime
723+
*/
724+
CAMLprim value stub_xenctrlext_numa_meminfo(value xch_val)
725+
{
726+
CAMLparam1(xch_val);
727+
CAMLlocal2(result, info);
728+
xc_interface *xch = xch_of_val(xch_val);
729+
unsigned int max_nodes = 0;
730+
unsigned int i;
731+
int ret;
732+
733+
#ifdef XEN_SYSCTL_numa_meminfo
734+
xen_sysctl_node_meminfo_t *meminfo = NULL;
735+
736+
/* First call to get node count */
737+
caml_release_runtime_system();
738+
ret = xc_numa_meminfo(xch, &max_nodes, NULL);
739+
caml_acquire_runtime_system();
740+
741+
if (ret == 0) {
742+
/* New hypercall available, use it */
743+
meminfo = calloc(max_nodes, sizeof(*meminfo));
744+
if (!meminfo)
745+
caml_raise_out_of_memory();
746+
747+
caml_release_runtime_system();
748+
ret = xc_numa_meminfo(xch, &max_nodes, meminfo);
749+
caml_acquire_runtime_system();
750+
751+
if (ret < 0) {
752+
int err = errno;
753+
free(meminfo);
754+
errno = err;
755+
failwith_xc(xch);
756+
}
757+
758+
result = caml_alloc_tuple(max_nodes);
759+
for (i = 0; i < max_nodes; i++) {
760+
info = caml_alloc_tuple(3);
761+
Store_field(info, 0, caml_copy_int64(meminfo[i].size));
762+
Store_field(info, 1, caml_copy_int64(meminfo[i].free));
763+
Store_field(info, 2, caml_copy_int64(meminfo[i].claimed));
764+
Store_field(result, i, info);
765+
}
766+
767+
free(meminfo);
768+
CAMLreturn(result);
769+
}
770+
771+
/* If we get ENOSYS or EOPNOTSUPP, fall back to old hypercall */
772+
if (errno != ENOSYS && errno != EOPNOTSUPP)
773+
failwith_xc(xch);
774+
#endif
775+
776+
/* Fallback: use xc_numainfo with claimed=0 */
777+
{
778+
xc_meminfo_t *old_meminfo = NULL;
779+
780+
caml_release_runtime_system();
781+
ret = xc_numainfo(xch, &max_nodes, NULL, NULL);
782+
caml_acquire_runtime_system();
783+
784+
if (ret < 0)
785+
failwith_xc(xch);
786+
787+
old_meminfo = calloc(max_nodes, sizeof(*old_meminfo));
788+
if (!old_meminfo)
789+
caml_raise_out_of_memory();
790+
791+
caml_release_runtime_system();
792+
ret = xc_numainfo(xch, &max_nodes, old_meminfo, NULL);
793+
caml_acquire_runtime_system();
794+
795+
if (ret < 0) {
796+
int err = errno;
797+
free(old_meminfo);
798+
errno = err;
799+
failwith_xc(xch);
800+
}
801+
802+
result = caml_alloc_tuple(max_nodes);
803+
for (i = 0; i < max_nodes; i++) {
804+
info = caml_alloc_tuple(3);
805+
Store_field(info, 0, caml_copy_int64(old_meminfo[i].memsize));
806+
Store_field(info, 1, caml_copy_int64(old_meminfo[i].memfree));
807+
Store_field(info, 2, caml_copy_int64(0)); /* claimed=0 */
808+
Store_field(result, i, info);
809+
}
810+
811+
free(old_meminfo);
812+
CAMLreturn(result);
813+
}
814+
}
815+
718816

719817
/*
720818
* Local variables:

ocaml/xenopsd/xc/domain.ml

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -995,16 +995,26 @@ let numa_hierarchy =
995995

996996
let numa_mutex = Mutex.create ()
997997

998+
let node_mem_claimable_for_new_vm ~node ~domid m =
999+
let open Xenctrlext.HostNuma in
1000+
let nodeid = Fmt.str "%a" Topology.NUMA.pp_dump_node node in
1001+
let available = Int64.sub m.free m.claimed in
1002+
D.debug
1003+
"mem_claimable_for_new_vm: NUMA nodeid=%s, domid=%d: memfree=%Ld \
1004+
memsize=%Ld claimed=%Ld: available=%Ld"
1005+
nodeid domid m.free m.size m.claimed available ;
1006+
available
1007+
9981008
let numa_init () =
9991009
let xcext = Xenctrlext.get_handle () in
10001010
let host = Lazy.force numa_hierarchy in
1001-
let mem = (Xenctrlext.numainfo xcext).memory in
1011+
let mem = Xenctrlext.HostNuma.numa_get_meminfo xcext in
10021012
D.debug "Host NUMA information: %s"
10031013
(Fmt.to_to_string (Fmt.Dump.option Topology.NUMA.pp_dump) host) ;
10041014
Array.iteri
10051015
(fun i m ->
1006-
let open Xenctrlext in
1007-
D.debug "NUMA node %d: %Ld/%Ld memory free" i m.memfree m.memsize
1016+
let open Xenctrlext.HostNuma in
1017+
D.debug "NUMA node %d: %Ld/%Ld/%Ld memory free" i m.free m.size m.claimed
10081018
)
10091019
mem
10101020

@@ -1021,10 +1031,13 @@ let numa_placement domid ~vcpus ~cores ~memory affinity =
10211031
let ( let* ) = Option.bind in
10221032
let xcext = get_handle () in
10231033
let* host = Lazy.force numa_hierarchy in
1024-
let numa_meminfo = (numainfo xcext).memory |> Array.to_seq in
1034+
let numa_meminfo = HostNuma.numa_get_meminfo xcext |> Array.to_seq in
10251035
let nodes =
10261036
Seq.map2
1027-
(fun node m -> NUMA.resource host node ~memory:m.memfree)
1037+
(fun node m ->
1038+
NUMA.resource host node
1039+
~memory:(node_mem_claimable_for_new_vm ~node ~domid m)
1040+
)
10281041
(NUMA.nodes host) numa_meminfo
10291042
in
10301043
let vm = NUMARequest.make ~memory ~vcpus ~cores in

ocaml/xenopsd/xc/numa.ml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,17 @@ let human_readable_bytes quantity =
4949
else
5050
loop [] quantity binary_prefixes |> String.concat ", "
5151

52-
let get_memory () =
53-
let {memory; _} = numainfo xc in
54-
memory
52+
let get_memory () = HostNuma.numa_get_meminfo xc
5553

5654
let print_mem c mem =
5755
for i = 0 to Array.length mem - 1 do
58-
let {memfree; memsize} = mem.(i) in
59-
let memfree = human_readable_bytes memfree in
60-
let memsize = human_readable_bytes memsize in
56+
let {HostNuma.size; free; claimed} = mem.(i) in
57+
let memfree = human_readable_bytes free in
58+
let memsize = human_readable_bytes size in
59+
let memclaimed = human_readable_bytes claimed in
6160
Logs.app (fun m ->
62-
m "\t%d: %s free out of %s" i memfree memsize ~tags:(stamp c)
61+
m "\t%d: %s free / %s claimed out of %s" i memfree memclaimed memsize
62+
~tags:(stamp c)
6363
)
6464
done
6565

@@ -72,7 +72,9 @@ let print_diff_mem before after =
7272
let diff c old cur =
7373
let changed_yet = ref false in
7474
for i = 0 to Int.min (Array.length old) (Array.length cur) - 1 do
75-
let {memfree= a_free; _}, {memfree= b_free; _} = (old.(i), cur.(i)) in
75+
let {HostNuma.free= a_free; _}, {HostNuma.free= b_free; _} =
76+
(old.(i), cur.(i))
77+
in
7678
if a_free <> b_free then (
7779
if not !changed_yet then changed_yet := true ;
7880
let free = human_readable_bytes b_free in

0 commit comments

Comments
 (0)