diff --git a/ocaml/xapi/xapi_vm_helpers.ml b/ocaml/xapi/xapi_vm_helpers.ml index 9daab6113e..9556096fe4 100644 --- a/ocaml/xapi/xapi_vm_helpers.ml +++ b/ocaml/xapi/xapi_vm_helpers.ml @@ -1304,9 +1304,9 @@ let allowed_VBD_devices_HVM_floppy = (fun x -> Device_number.(make Floppy ~disk:x ~partition:0)) (inclusive_range 0 1) -let allowed_VIF_devices_HVM = vif_inclusive_range 0 6 +let allowed_VIF_devices_HVM = vif_inclusive_range 0 15 -let allowed_VIF_devices_PV = vif_inclusive_range 0 6 +let allowed_VIF_devices_PV = vif_inclusive_range 0 15 (** [possible_VBD_devices_of_string s] returns a list of Device_number.t which represent possible interpretations of [s]. *) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 36a2ea92fe..b47344a30e 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -848,10 +848,11 @@ module Queues = struct let get tag qs = with_lock qs.m (fun () -> - if StringMap.mem tag qs.qs then - StringMap.find tag qs.qs - else - Queue.create () + match StringMap.find_opt tag qs.qs with + | Some x -> + x + | None -> + Queue.create () ) let tags qs = @@ -862,10 +863,11 @@ module Queues = struct let push_with_coalesce should_keep tag item qs = with_lock qs.m (fun () -> let q = - if StringMap.mem tag qs.qs then - StringMap.find tag qs.qs - else - Queue.create () + match StringMap.find_opt tag qs.qs with + | Some x -> + x + | None -> + Queue.create () in push_with_coalesce should_keep item q ; qs.qs <- StringMap.add tag q qs.qs ; @@ -2297,11 +2299,18 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) debug "VM.destroy %s" id ; B.VM.destroy t (VM_DB.read_exn id) | VM_create (id, memory_upper_bound, final_id, no_sharept) -> - debug "VM.create %s memory_upper_bound = %s" id + let num_of_vbds = List.length (VBD_DB.vbds id) in + let num_of_vifs = List.length (VIF_DB.vifs id) in + debug + "VM.create %s memory_upper_bound = %s, num_of_vbds = %d, num_of_vifs = \ + %d" + id (Option.value ~default:"None" (Option.map Int64.to_string memory_upper_bound) - ) ; + ) + num_of_vbds num_of_vifs ; B.VM.create t memory_upper_bound (VM_DB.read_exn id) final_id no_sharept + num_of_vbds num_of_vifs | VM_build (id, force) -> debug "VM.build %s" id ; let vbds : Vbd.t list = VBD_DB.vbds id |> vbd_plug_order in diff --git a/ocaml/xenopsd/lib/xenops_server_plugin.ml b/ocaml/xenopsd/lib/xenops_server_plugin.ml index 19ab155aa9..e4a61bb9ac 100644 --- a/ocaml/xenopsd/lib/xenops_server_plugin.ml +++ b/ocaml/xenopsd/lib/xenops_server_plugin.ml @@ -84,6 +84,8 @@ module type S = sig -> Vm.t -> Vm.id option -> bool (* no_sharept*) + -> int (* num_of_vbds *) + -> int (* num_of_vifs *) -> unit val build : diff --git a/ocaml/xenopsd/lib/xenops_server_simulator.ml b/ocaml/xenopsd/lib/xenops_server_simulator.ml index 13ae583c7d..0c6ac3f606 100644 --- a/ocaml/xenopsd/lib/xenops_server_simulator.ml +++ b/ocaml/xenopsd/lib/xenops_server_simulator.ml @@ -547,7 +547,8 @@ module VM = struct let remove _vm = () - let create _ memory_limit vm _ _ = with_lock m (create_nolock memory_limit vm) + let create _ memory_limit vm _ _ _ _ = + with_lock m (create_nolock memory_limit vm) let destroy _ vm = with_lock m (destroy_nolock vm) diff --git a/ocaml/xenopsd/lib/xenops_utils.ml b/ocaml/xenopsd/lib/xenops_utils.ml index 481ad1b610..53dc73709a 100644 --- a/ocaml/xenopsd/lib/xenops_utils.ml +++ b/ocaml/xenopsd/lib/xenops_utils.ml @@ -227,11 +227,13 @@ module MemFS = struct match (path, fs) with | [], Dir d -> d - | p :: ps, Dir d -> - if StringMap.mem p !d then - aux ps (StringMap.find p !d) - else + | p :: ps, Dir d -> ( + match StringMap.find_opt p !d with + | Some x -> + aux ps x + | None -> raise Not_dir + ) | _, Leaf _ -> raise Not_dir in @@ -285,14 +287,13 @@ module MemFS = struct (fun p -> let dir = dir_locked (dirname p) in let deletable = - if StringMap.mem (filename p) !dir then - match StringMap.find (filename p) !dir with - | Dir child -> - StringMap.is_empty !child - | Leaf _ -> - true - else - false + match StringMap.find_opt (filename p) !dir with + | Some (Dir child) -> + StringMap.is_empty !child + | Some (Leaf _) -> + true + | None -> + false in if deletable then dir := StringMap.remove (filename p) !dir ) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 287c1c77b2..e4bca3a283 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -269,7 +269,8 @@ let wait_xen_free_mem ~xc ?(maximum_wait_time_seconds = 64) required_memory_kib in wait 0 -let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept = +let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept + num_of_vbds num_of_vifs = let open Xenctrl in let host_info = Xenctrl.physinfo xc in @@ -385,12 +386,80 @@ let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept = ; max_evtchn_port= -1 ; max_grant_frames= ( try int_of_string (List.assoc "max_grant_frames" vm_info.platformdata) - with _ -> 64 + with _ -> + let max_per_vif = 8 in + (* 1 VIF takes up (256 rx entries + 256 tx entries) * 8 queues max + * 8 bytes per grant table entry / 4096 bytes size of frame *) + let reasonable_per_vbd = 1 in + (* (1 ring (itself taking up one granted page) + 1 ring * + 32 requests * 11 grant refs contained in each * 8 bytes ) / + 4096 bytes size of frame = 0.6875, rounded up *) + let frames_number = + max 64 + ((max_per_vif * (num_of_vifs + 1)) + + (reasonable_per_vbd * (num_of_vbds + 1)) + ) + in + debug "estimated max_grant_frames = %d" frames_number ; + frames_number + (* max_per_vif * (num_of_vifs + 1 hotplugged future one) + + max_per_vbd * (num_of_vbds + 1 hotplugged future one) *) + + (* NOTE: While the VIF calculation is precise, the VBD one is a + very rough approximation of a reasonable value of + RING_SIZE * MAX_SEGMENTS_PER_REQUEST + PAGES_FOR_RING_ITSELF + The following points should allow for a rough understanding + of the scale of the problem of better estimation: + + 1) The blkfront driver can consume different numbers of grant + pages depending on the features advertised by the back driver + (and negotiated with it). These features can differ per VBD, and + right now aren't even known at the time of domain creation. + These include: + 1.1) indirect segments - these contain + BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST grants at most, and each + of these frames contains GRANTS_PER_INDIRECT_FRAME grants in + turn (stored in blkif_request_segment). + In practice, this means a catastrophic explosion - we should + not really aim to detect if indirect requests feature is on, + but turn it off to get reasonable estimates. + 1.2) persistent grants - these are an optimization, so + shouldn't really change the calculations, worst case is none + of the grants are persistent. + 1.3) multi-page rings - these change the RING_SIZE, but not in + a trivial manner (see ring-page-order) + 1.4) multi-queue - these change the number of rings, adding + another multiplier. + 2) The "8 bytes" multiplier for a grant table entry only applies + to grants_v1. v2 grants take up 16 bytes per entry. And it's + impossible to detect this feature at the moment. + 3) A dynamically-sized grant table itself could be a solution? + Used to exist before, caused a lot of XSAs, hard to get right. + 4) Drivers might need to be more explicitly limited in how many + pages they can consume + 5) VBD backdriver's features should be managed by XAPI on the + object itself and (their max bound) known at the time of domain + creation. + + So for this estimate, there is only 1 ring which is 1 page, with + 32 entries, each entry (request) can have up to 11 pages + (excluding indirect pages and other complications). + + SEE: xen-blkfront.c, blkif.h, and the backdriver to understand + the process of negotiation (visible in xenstore, in kernel + module parameters in the sys filesystem afterwards) + *) ) ; max_maptrack_frames= ( try int_of_string (List.assoc "max_maptrack_frames" vm_info.platformdata) - with _ -> 1024 + with _ -> + 0 + (* This should be >0 only for driver domains (Dom0 startup is not + handled by the toolstack), which currently do not exist. + To support these in the future, xenopsd would need to check what + type of domain is being started. + *) ) ; max_grant_version= (if List.mem CAP_Gnttab_v2 host_info.capabilities then 2 else 1) diff --git a/ocaml/xenopsd/xc/domain.mli b/ocaml/xenopsd/xc/domain.mli index 4fac8ccde5..a768182702 100644 --- a/ocaml/xenopsd/xc/domain.mli +++ b/ocaml/xenopsd/xc/domain.mli @@ -149,6 +149,8 @@ val make : -> [`VM] Uuidx.t -> string option -> bool (* no_sharept *) + -> int (* num_of_vbds *) + -> int (* num_of_vifs *) -> domid (** Create a fresh (empty) domain with a specific UUID, returning the domain ID *) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 61e5d45fb8..18383a04c0 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -1389,7 +1389,8 @@ module VM = struct in (device_id, revision) - let create_exn task memory_upper_bound vm final_id no_sharept = + let create_exn task memory_upper_bound vm final_id no_sharept num_of_vbds + num_of_vifs = let k = vm.Vm.id in with_xc_and_xs (fun xc xs -> (* Ensure the DB contains something for this VM - this is to avoid a @@ -1518,7 +1519,8 @@ module VM = struct let create_info = generate_create_info ~xs vm persistent in let domid = Domain.make ~xc ~xs create_info vm.vcpu_max domain_config - (uuid_of_vm vm) final_id no_sharept + (uuid_of_vm vm) final_id no_sharept num_of_vbds + num_of_vifs in Mem.transfer_reservation_to_domain dbg domid reservation_id ; let initial_target =