Skip to content

Commit 994a8dc

Browse files
authored
CA-421914: preserve Host.numa_affinity_policy across pool join (#6796)
When `numa-placement` was true (the default in XS9), configuring a host's `numa-affinity-policy` to `any` got lost during pool join. This is because `create_params` is maintained by hand and needs to be extended every time we add a new field to the datamodel. Update `create_params` to propagate `numa_affinity_policy` on pool join. Before pool join: ``` [root@genuk-21-09d ~]# xe host-list params=all|grep numa numa-affinity-policy ( RW): any ``` After pool join: ``` [root@genuk-21-09d ~]# xe host-list params=all|grep numa numa-affinity-policy ( RW): any numa-affinity-policy ( RW): any ``` (prior to this bugfix the newly joined host would revert to default_policy). The correct policy is also set in xenopsd: ``` 2025-12-15T15:12:09.029478+00:00 genuk-21-09d xenopsd-xc: [ info||134 |host.set_numa_affinity_policy R:99aecf4c494f|xenops_server] Enforcing 'any' NUMA affinity policy ```
2 parents 654cef3 + 2e46250 commit 994a8dc

File tree

7 files changed

+31
-23
lines changed

7 files changed

+31
-23
lines changed

ocaml/idl/datamodel_host.ml

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,6 +1209,23 @@ let license_remove =
12091209
to the unlicensed edition"
12101210
~allowed_roles:_R_POOL_OP ()
12111211

1212+
let host_numa_affinity_policy =
1213+
Enum
1214+
( "host_numa_affinity_policy"
1215+
, [
1216+
("any", "VMs are spread across all available NUMA nodes")
1217+
; ( "best_effort"
1218+
, "VMs are placed on the smallest number of NUMA nodes that they fit \
1219+
using soft-pinning, but the policy doesn't guarantee a balanced \
1220+
placement, falling back to the 'any' policy."
1221+
)
1222+
; ( "default_policy"
1223+
, "Use the NUMA affinity policy that is the default for the current \
1224+
version"
1225+
)
1226+
]
1227+
)
1228+
12121229
let create_params =
12131230
[
12141231
{
@@ -1406,6 +1423,13 @@ let create_params =
14061423
; param_release= numbered_release "25.38.0-next"
14071424
; param_default= Some (VBool false)
14081425
}
1426+
; {
1427+
param_type= host_numa_affinity_policy
1428+
; param_name= "numa_affinity_policy"
1429+
; param_doc= "NUMA-aware VM memory and vCPU placement policy"
1430+
; param_release= numbered_release "25.39.0-next"
1431+
; param_default= Some (VEnum "default_policy")
1432+
}
14091433
]
14101434

14111435
let create =
@@ -2311,23 +2335,6 @@ let cleanup_pool_secret =
23112335
]
23122336
~allowed_roles:_R_LOCAL_ROOT_ONLY ~hide_from_docs:true ()
23132337

2314-
let host_numa_affinity_policy =
2315-
Enum
2316-
( "host_numa_affinity_policy"
2317-
, [
2318-
("any", "VMs are spread across all available NUMA nodes")
2319-
; ( "best_effort"
2320-
, "VMs are placed on the smallest number of NUMA nodes that they fit \
2321-
using soft-pinning, but the policy doesn't guarantee a balanced \
2322-
placement, falling back to the 'any' policy."
2323-
)
2324-
; ( "default_policy"
2325-
, "Use the NUMA affinity policy that is the default for the current \
2326-
version"
2327-
)
2328-
]
2329-
)
2330-
23312338
let set_numa_affinity_policy =
23322339
call ~name:"set_numa_affinity_policy" ~lifecycle:[]
23332340
~doc:"Set VM placement NUMA affinity policy"

ocaml/tests/common/test_common.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host")
184184
~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry
185185
~console_idle_timeout ~ssh_auto_mode ~secure_boot
186186
~software_version:(Xapi_globs.software_version ())
187-
~https_only
187+
~https_only ~numa_affinity_policy:`default_policy
188188
in
189189
Db.Host.set_cpu_info ~__context ~self:host ~value:default_cpu_info ;
190190
host

ocaml/tests/test_host.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ let add_host __context name =
2727
~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Clock.Date.epoch
2828
~console_idle_timeout:0L ~ssh_auto_mode:false ~secure_boot:false
2929
~software_version:(Xapi_globs.software_version ())
30-
~https_only:false
30+
~https_only:false ~numa_affinity_policy:`default_policy
3131
)
3232

3333
(* Creates an unlicensed pool with the maximum number of hosts *)

ocaml/xapi/dbsync_slave.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ let create_localhost ~__context info =
6666
~console_idle_timeout:Constants.default_console_idle_timeout
6767
~ssh_auto_mode:!Xapi_globs.ssh_auto_mode_default
6868
~secure_boot:false ~software_version:[]
69-
~https_only:!Xapi_globs.https_only
69+
~https_only:!Xapi_globs.https_only ~numa_affinity_policy:`default_policy
7070
in
7171
()
7272

ocaml/xapi/xapi_host.ml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,7 +1029,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address
10291029
~license_params ~edition ~license_server ~local_cache_sr ~chipset_info
10301030
~ssl_legacy:_ ~last_software_update ~last_update_hash ~ssh_enabled
10311031
~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode
1032-
~secure_boot ~software_version ~https_only =
1032+
~secure_boot ~software_version ~https_only ~numa_affinity_policy =
10331033
(* fail-safe. We already test this on the joining host, but it's racy, so multiple concurrent
10341034
pool-join might succeed. Note: we do it in this order to avoid a problem checking restrictions during
10351035
the initial setup of the database *)
@@ -1073,8 +1073,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address
10731073
~name_label ~uuid ~other_config:[] ~capabilities:[]
10741074
~cpu_configuration:[] (* !!! FIXME hard coding *)
10751075
~cpu_info:[] ~chipset_info ~memory_overhead:0L
1076-
~sched_policy:"credit" (* !!! FIXME hard coding *)
1077-
~numa_affinity_policy:`default_policy
1076+
~sched_policy:"credit" (* !!! FIXME hard coding *) ~numa_affinity_policy
10781077
~supported_bootloaders:(List.map fst Xapi_globs.supported_bootloaders)
10791078
~suspend_image_sr:Ref.null ~crash_dump_sr:Ref.null ~logging:[] ~hostname
10801079
~address ~metrics ~license_params ~boot_free_mem:0L ~ha_statefiles:[]

ocaml/xapi/xapi_host.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ val create :
139139
-> secure_boot:bool
140140
-> software_version:(string * string) list
141141
-> https_only:bool
142+
-> numa_affinity_policy:API.host_numa_affinity_policy
142143
-> [`host] Ref.t
143144

144145
val destroy : __context:Context.t -> self:API.ref_host -> unit

ocaml/xapi/xapi_pool.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,7 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) :
10621062
~secure_boot:host.API.host_secure_boot
10631063
~software_version:host.API.host_software_version
10641064
~https_only:host.API.host_https_only
1065+
~numa_affinity_policy:host.API.host_numa_affinity_policy
10651066
in
10661067
(* Copy other-config into newly created host record: *)
10671068
no_exn

0 commit comments

Comments
 (0)