Skip to content

Commit e3732fa

Browse files
committed
Merge branch 'private/edvint/hardclaim' into private/edvint/numaall
2 parents 51bba91 + ee3c206 commit e3732fa

File tree

7 files changed

+139
-24
lines changed

7 files changed

+139
-24
lines changed

ocaml/xapi-idl/memory/memory.ml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ type memory_config = {
185185
; shadow_mib: int64
186186
; required_host_free_mib: int64
187187
; overhead_mib: int64
188+
; build_claim_pages: int64
188189
}
189190

190191
module Memory_model (D : MEMORY_MODEL_DATA) = struct
@@ -226,14 +227,16 @@ module Memory_model (D : MEMORY_MODEL_DATA) = struct
226227
let shadow_multiplier_default = 1.0
227228

228229
let full_config static_max_mib video_mib target_mib vcpus shadow_multiplier =
230+
let build_start_mib = build_start_mib static_max_mib target_mib video_mib in
229231
{
230232
build_max_mib= build_max_mib static_max_mib video_mib
231-
; build_start_mib= build_start_mib static_max_mib target_mib video_mib
233+
; build_start_mib
232234
; xen_max_mib= xen_max_mib static_max_mib
233235
; shadow_mib= shadow_mib static_max_mib vcpus shadow_multiplier
234236
; required_host_free_mib=
235237
footprint_mib target_mib static_max_mib vcpus shadow_multiplier
236238
; overhead_mib= overhead_mib static_max_mib vcpus shadow_multiplier
239+
; build_claim_pages= pages_of_mib build_start_mib
237240
}
238241
end
239242

ocaml/xenopsd/lib/xenops_server.ml

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,8 @@ type atomic =
155155
| VM_create_device_model of (Vm.id * bool)
156156
| VM_destroy_device_model of Vm.id
157157
| VM_destroy of Vm.id
158-
| VM_create of (Vm.id * int64 option * Vm.id option * bool) (*no_sharept*)
158+
| VM_create of (Vm.id * (int64 * int64 option) option * Vm.id option * bool)
159+
(*no_sharept*)
159160
| VM_build of (Vm.id * bool)
160161
| VM_shutdown_domain of (Vm.id * shutdown_request * float)
161162
| VM_s3suspend of Vm.id
@@ -330,6 +331,7 @@ type vm_receive_op = {
330331
; vmr_socket: Unix.file_descr
331332
; vmr_handshake: string option (** handshake protocol *)
332333
; vmr_compressed: bool
334+
; vmr_memory_total_source: int64 option [@default None]
333335
}
334336
[@@deriving rpcty]
335337

@@ -2317,19 +2319,26 @@ let rec perform_atomic ~progress_callback ?result (op : atomic)
23172319
| VM_destroy id ->
23182320
debug "VM.destroy %s" id ;
23192321
B.VM.destroy t (VM_DB.read_exn id)
2320-
| VM_create (id, memory_upper_bound, final_id, no_sharept) ->
2322+
| VM_create (id, memory_upper_bound_and_source, final_id, no_sharept) ->
23212323
let num_of_vbds = List.length (VBD_DB.vbds id) in
23222324
let num_of_vifs = List.length (VIF_DB.vifs id) in
2325+
let memory_upper_bound = Option.map fst memory_upper_bound_and_source
2326+
and memory_total_source =
2327+
Option.map snd memory_upper_bound_and_source |> Option.join
2328+
in
23232329
debug
2324-
"VM.create %s memory_upper_bound = %s, num_of_vbds = %d, num_of_vifs = \
2325-
%d"
2330+
"VM.create %s memory_upper_bound = %s, memory_total_source = %s, \
2331+
num_of_vbds = %d, num_of_vifs = %d"
23262332
id
23272333
(Option.value ~default:"None"
23282334
(Option.map Int64.to_string memory_upper_bound)
23292335
)
2336+
(Option.value ~default:"None"
2337+
(Option.map Int64.to_string memory_total_source)
2338+
)
23302339
num_of_vbds num_of_vifs ;
2331-
B.VM.create t memory_upper_bound (VM_DB.read_exn id) final_id no_sharept
2332-
num_of_vbds num_of_vifs
2340+
B.VM.create t memory_upper_bound memory_total_source (VM_DB.read_exn id)
2341+
final_id no_sharept num_of_vbds num_of_vifs
23332342
| VM_build (id, force) ->
23342343
debug "VM.build %s" id ;
23352344
let vbds : Vbd.t list = VBD_DB.vbds id |> vbd_plug_order in
@@ -2897,7 +2906,10 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit =
28972906
Request.write (fun _ -> ()) request fd
28982907
in
28992908
do_request vm_fd
2900-
[("memory_limit", Int64.to_string state.Vm.memory_limit)]
2909+
[
2910+
("memory_limit", Int64.to_string state.Vm.memory_limit)
2911+
; ("memory_total_source", Int64.to_string state.Vm.memory_actual)
2912+
]
29012913
url ;
29022914
let first_handshake () =
29032915
( match Handshake.recv vm_fd with
@@ -3004,6 +3016,7 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit =
30043016
vmr_id= id
30053017
; vmr_final_id= final_id
30063018
; vmr_memory_limit= memory_limit
3019+
; vmr_memory_total_source= memory_total_source
30073020
; vmr_socket= s
30083021
; vmr_handshake= handshake
30093022
; vmr_compressed
@@ -3084,7 +3097,14 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit =
30843097
)
30853098
in
30863099
perform_atomics
3087-
([VM_create (id, Some memory_limit, Some final_id, no_sharept)]
3100+
([
3101+
VM_create
3102+
( id
3103+
, Some (memory_limit, memory_total_source)
3104+
, Some final_id
3105+
, no_sharept
3106+
)
3107+
]
30883108
(* Perform as many operations as possible on the destination
30893109
domain before pausing the original domain *)
30903110
@ atomics_of_operation (VM_restore_vifs id)
@@ -3902,6 +3922,9 @@ module VM = struct
39023922
let module Response = Cohttp.Response.Make (Cohttp_posix_io.Unbuffered_IO) in
39033923
let dbg = List.assoc "dbg" cookies in
39043924
let memory_limit = List.assoc "memory_limit" cookies |> Int64.of_string in
3925+
let memory_total_source =
3926+
List.assoc_opt "memory_total_source" cookies |> Option.map Int64.of_string
3927+
in
39053928
let handshake = List.assoc_opt cookie_mem_migration cookies in
39063929
let compressed_memory = get_compression cookies in
39073930
Debug.with_thread_associated dbg
@@ -3932,6 +3955,7 @@ module VM = struct
39323955
; vmr_socket= transferred_fd
39333956
; vmr_handshake= handshake
39343957
; vmr_compressed= compressed_memory
3958+
; vmr_memory_total_source= memory_total_source
39353959
}
39363960
in
39373961
let task =

ocaml/xenopsd/lib/xenops_server_plugin.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ module type S = sig
8181
val create :
8282
Xenops_task.task_handle
8383
-> int64 option
84+
-> int64 option
8485
-> Vm.t
8586
-> Vm.id option
8687
-> bool (* no_sharept*)

ocaml/xenopsd/lib/xenops_server_simulator.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ module VM = struct
566566

567567
let remove _vm = ()
568568

569-
let create _ memory_limit vm _ _ _ _ =
569+
let create _ memory_limit _ vm _ _ _ _ =
570570
with_lock m (create_nolock memory_limit vm)
571571

572572
let destroy _ vm = with_lock m (destroy_nolock vm)

ocaml/xenopsd/xc/domain.ml

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ type builder_spec_info =
152152
type build_info = {
153153
memory_max: int64 (** memory max in kilobytes *)
154154
; memory_target: int64 (** memory target in kilobytes *)
155+
; memory_total_source: int64 option
156+
(** amount of memory to claim (during migration) *)
155157
; kernel: string (** in hvm case, point to hvmloader *)
156158
; vcpus: int (** vcpus max *)
157159
; priv: builder_spec_info
@@ -246,7 +248,10 @@ let wait_xen_free_mem ~xc ?(maximum_wait_time_seconds = 64) required_memory_kib
246248
in
247249
(* At exponentially increasing intervals, write *)
248250
(* a debug message saying how long we've waited: *)
249-
if is_power_of_2 accumulated_wait_time_seconds then
251+
if
252+
accumulated_wait_time_seconds = 0
253+
|| is_power_of_2 accumulated_wait_time_seconds
254+
then
250255
debug
251256
"Waited %i second(s) for memory to become available: %Ld KiB free, %Ld \
252257
KiB scrub, %Ld KiB required"
@@ -1057,7 +1062,7 @@ let numa_placement domid ~vcpus ~cores ~memory affinity =
10571062
__FUNCTION__ domid ;
10581063
None
10591064
in
1060-
let nr_pages = Int64.div memory 4096L |> Int64.to_int in
1065+
let nr_pages = Memory.pages_of_bytes_used memory |> Int64.to_int in
10611066
try
10621067
D.debug "NUMAClaim domid %d: local claim on node %d: %d pages" domid
10631068
node nr_pages ;
@@ -1071,7 +1076,7 @@ let numa_placement domid ~vcpus ~cores ~memory affinity =
10711076
D.debug "NUMAClaim domid %d: local claim not available" domid ;
10721077
set_vcpu_affinity cpu_affinity ;
10731078
None
1074-
| Xenctrlext.Unix_error (errno, _) ->
1079+
| Xenctrlext.Unix_error ((Unix.ENOMEM as errno), _) ->
10751080
D.info
10761081
"%s: unable to claim enough memory, domain %d won't be hosted in a \
10771082
single NUMA node. (error %s)"
@@ -1083,6 +1088,13 @@ let numa_placement domid ~vcpus ~cores ~memory affinity =
10831088
let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid =
10841089
let open Memory in
10851090
let uuid = get_uuid ~xc domid in
1091+
debug
1092+
"VM = %s, build_max_mib = %Ld, build_start_mib = %Ld, xen_max_mib =\n\
1093+
\ %Ld, shadow_mib = %Ld, required_host_free_mib = %Ld, overhead_mib = \
1094+
%Ld"
1095+
(Uuidx.to_string uuid) memory.build_max_mib memory.build_start_mib
1096+
memory.xen_max_mib memory.shadow_mib memory.required_host_free_mib
1097+
memory.overhead_mib ;
10861098
debug "VM = %s; domid = %d; waiting for %Ld MiB of free host memory"
10871099
(Uuidx.to_string uuid) domid memory.required_host_free_mib ;
10881100
(* CA-39743: Wait, if necessary, for the Xen scrubber to catch up. *)
@@ -1170,10 +1182,38 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid =
11701182
and cores =
11711183
Xenops_server.cores_of_numa_affinity_policy pin ~vcpus
11721184
in
1173-
numa_placement domid ~vcpus ~cores
1174-
~memory:(Int64.mul memory.xen_max_mib 1048576L)
1175-
affinity
1176-
|> Option.map fst
1185+
1186+
let build_claim_bytes =
1187+
Memory.bytes_of_pages memory.build_claim_pages
1188+
in
1189+
D.debug "VM = %s; domid = %d; will claim %Ld bytes = %Ld pages"
1190+
(Uuidx.to_string uuid) domid build_claim_bytes
1191+
memory.build_claim_pages ;
1192+
let memory = build_claim_bytes in
1193+
match numa_placement domid ~vcpus ~cores ~memory affinity with
1194+
| None ->
1195+
(* Always perform a global claim when NUMA placement is
1196+
enabled, and single node claims failed or were
1197+
unavailable:
1198+
This tries to ensures that memory allocated for this
1199+
domain won't use up memory claimed by other domains.
1200+
If claims are mixed with non-claims then Xen can't
1201+
currently guarantee that it would honour the existing
1202+
claims.
1203+
A failure here is a hard failure: we'd fail allocating
1204+
memory later anyway
1205+
*)
1206+
let nr_pages =
1207+
Memory.pages_of_bytes_used memory |> Int64.to_int
1208+
in
1209+
let xcext = Xenctrlext.get_handle () in
1210+
D.debug "NUMAClaim domid %d: global claim: %d pages" domid
1211+
nr_pages ;
1212+
Xenctrlext.domain_claim_pages xcext domid
1213+
~numa_node:Xenctrlext.NumaNode.none nr_pages ;
1214+
None
1215+
| Some (plan, _) ->
1216+
Some plan
11771217
)
11781218
in
11791219
let store_chan, console_chan = create_channels ~xc uuid domid in
@@ -1865,6 +1905,17 @@ let restore (task : Xenops_task.task_handle) ~xc ~xs ~dm ~timeoffset ~extras
18651905
maybe_ca_140252_workaround ~xc ~vcpus domid ;
18661906
(memory, vm_stuff, `pvh)
18671907
in
1908+
let memory =
1909+
match info.memory_total_source with
1910+
| None ->
1911+
memory
1912+
| Some kib ->
1913+
let build_claim_pages = Memory.pages_of_kib_used kib in
1914+
let bytes = Memory.bytes_of_kib kib in
1915+
debug "Domid %d: memory_total_source = %Ld bytes = %Ld KiB = %Ld pages"
1916+
domid bytes kib build_claim_pages ;
1917+
Memory.{memory with build_claim_pages}
1918+
in
18681919
let store_port, console_port, numa_placements =
18691920
build_pre ~xc ~xs ~memory ~vcpus ~hard_affinity:info.hard_affinity domid
18701921
in

ocaml/xenopsd/xc/domain.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ val builder_spec_info : builder_spec_info Rpc.Types.def
130130
type build_info = {
131131
memory_max: int64 (** memory max in kilobytes *)
132132
; memory_target: int64 (** memory target in kilobytes *)
133+
; memory_total_source: int64 option
134+
(** memory used on source during migration/resume in kilobytes *)
133135
; kernel: string (** image to load. In HVM case, point to hvmloader *)
134136
; vcpus: int (** vcpus max *)
135137
; priv: builder_spec_info

ocaml/xenopsd/xc/xenops_server_xen.ml

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1637,6 +1637,7 @@ module VM = struct
16371637
{
16381638
Domain.memory_max= vm.memory_static_max /// 1024L
16391639
; memory_target= vm.memory_dynamic_min /// 1024L
1640+
; memory_total_source= None
16401641
; kernel= ""
16411642
; vcpus= vm.vcpu_max
16421643
; priv= builder_spec_info
@@ -1747,8 +1748,8 @@ module VM = struct
17471748
in
17481749
(device_id, revision)
17491750

1750-
let create_exn task memory_upper_bound vm final_id no_sharept num_of_vbds
1751-
num_of_vifs =
1751+
let create_exn task memory_upper_bound memory_total_source vm final_id
1752+
no_sharept num_of_vbds num_of_vifs =
17521753
let k = vm.Vm.id in
17531754
with_xc_and_xs (fun xc xs ->
17541755
(* Ensure the DB contains something for this VM - this is to avoid a
@@ -1821,33 +1822,40 @@ module VM = struct
18211822
needed. If we are live migrating then we will only know an
18221823
upper bound. If we are starting from scratch then we have a
18231824
free choice. *)
1824-
let min_bytes, max_bytes =
1825+
let min_bytes, max_bytes, memory_total_source_bytes =
18251826
match memory_upper_bound with
18261827
| Some x ->
18271828
debug "VM = %s; using memory_upper_bound = %Ld" vm.Vm.id x ;
1828-
(x, x)
1829+
(x, x, memory_total_source)
18291830
| None ->
18301831
if resuming then (
18311832
debug "VM = %s; using stored suspend_memory_bytes = %Ld"
18321833
vm.Vm.id persistent.VmExtra.suspend_memory_bytes ;
18331834
( persistent.VmExtra.suspend_memory_bytes
18341835
, persistent.VmExtra.suspend_memory_bytes
1836+
, Some persistent.VmExtra.suspend_memory_bytes
18351837
)
18361838
) else (
18371839
debug
18381840
"VM = %s; using memory_dynamic_min = %Ld and \
18391841
memory_dynamic_max = %Ld"
18401842
vm.Vm.id vm.memory_dynamic_min vm.memory_dynamic_max ;
1841-
(vm.memory_dynamic_min, vm.memory_dynamic_max)
1843+
(vm.memory_dynamic_min, vm.memory_dynamic_max, None)
18421844
)
18431845
in
18441846
let min_kib = kib_of_bytes_used (min_bytes +++ overhead_bytes)
1847+
and memory_total_source_kib =
1848+
Option.map kib_of_bytes_used memory_total_source_bytes
18451849
and max_kib = kib_of_bytes_used (max_bytes +++ overhead_bytes) in
18461850
(* XXX: we would like to be able to cancel an in-progress
18471851
with_reservation *)
18481852
let dbg = Xenops_task.get_dbg task in
18491853
Mem.with_reservation dbg min_kib max_kib
18501854
(fun target_plus_overhead_kib reservation_id ->
1855+
debug
1856+
"VM = %s, memory [%Ld KiB, %Ld KiB], \
1857+
target_plus_overhead=%Ld KiB"
1858+
vm.Vm.id min_kib max_kib target_plus_overhead_kib ;
18511859
let domain_config, persistent =
18521860
match persistent.VmExtra.domain_config with
18531861
| Some dc ->
@@ -1888,8 +1896,26 @@ module VM = struct
18881896
let target_bytes =
18891897
target_plus_overhead_bytes --- overhead_bytes
18901898
in
1899+
debug
1900+
"VM = %s, memory target_bytes = %Ld, dynamic max = %Ld"
1901+
vm.Vm.id target_bytes vm.memory_dynamic_max ;
18911902
min vm.memory_dynamic_max target_bytes
18921903
in
1904+
let persistent =
1905+
match persistent with
1906+
| {VmExtra.build_info= Some x; _} as t ->
1907+
{
1908+
t with
1909+
build_info=
1910+
Some
1911+
{
1912+
x with
1913+
memory_total_source= memory_total_source_kib
1914+
}
1915+
}
1916+
| _ ->
1917+
persistent
1918+
in
18931919
set_initial_target ~xs domid (Int64.div initial_target 1024L) ;
18941920
(* Log uses of obsolete option *)
18951921
if vm.suppress_spurious_page_faults then
@@ -2367,6 +2393,7 @@ module VM = struct
23672393
{
23682394
Domain.memory_max= static_max_kib
23692395
; memory_target= initial_target
2396+
; memory_total_source= None
23702397
; kernel
23712398
; vcpus= vm.vcpu_max
23722399
; priv
@@ -3005,6 +3032,7 @@ module VM = struct
30053032
| _ ->
30063033
""
30073034
in
3035+
debug "VM = %s, initial_target = %Ld" vm.Vm.id initial_target ;
30083036
({x with Domain.memory_target= initial_target}, timeoffset)
30093037
in
30103038
let vtpm = vtpm_of ~vm in
@@ -3144,7 +3172,10 @@ module VM = struct
31443172
let memory_actual =
31453173
let pages = Int64.of_nativeint di.Xenctrl.total_memory_pages in
31463174
let kib = Xenctrl.pages_to_kib pages in
3147-
Memory.bytes_of_kib kib
3175+
let bytes = Memory.bytes_of_kib kib in
3176+
D.debug "VM %s memory actual: %Ld pages = %Ld KiB = %Ld bytes"
3177+
(Uuidm.to_string uuid) pages kib bytes ;
3178+
bytes
31483179
in
31493180
let memory_limit =
31503181
(* The maximum amount of memory the domain can consume is the max
@@ -3167,7 +3198,10 @@ module VM = struct
31673198
in
31683199
(* CA-31764: may be larger than static_max if maxmem has been
31693200
increased to initial-reservation. *)
3170-
max memory_actual max_memory_bytes
3201+
let result = max memory_actual max_memory_bytes in
3202+
D.debug "VM %s memory limit = %Ld bytes" (Uuidm.to_string uuid)
3203+
result ;
3204+
result
31713205
in
31723206
let rtc =
31733207
try

0 commit comments

Comments
 (0)