diff --git a/examples/gym-cartpole/smart_agent.zls b/examples/gym-cartpole/smart_agent.zls index dd1c160..a51e44a 100644 --- a/examples/gym-cartpole/smart_agent.zls +++ b/examples/gym-cartpole/smart_agent.zls @@ -27,9 +27,7 @@ let proba model obs_gym = action where rec obs = simple_pendulum (obs_gym, (Right fby action)) and action = controller (obs) and () = Infer_pf.factor (-10. *. (abs_float (obs.pole_angle))) - and display = draw_obs_back obs let node smart_main () = () where - rec reset action = Infer_pf.plan 10 10 model obs every true + rec reset action = Infer_pf.plan_pf 30 10 10 model obs every true and obs, _, stop = cart_pole_gym true (Right fby action) - and display = draw_obs_front obs diff --git a/examples/gym-cartpole/smart_pid_agent.zls b/examples/gym-cartpole/smart_pid_agent.zls index b90c1d2..702b61e 100644 --- a/examples/gym-cartpole/smart_pid_agent.zls +++ b/examples/gym-cartpole/smart_pid_agent.zls @@ -51,7 +51,6 @@ let proba smart_model (obs_gym) = action where rec obs = simple_pendulum (obs_gym, (Right fby action)) and action = smart_controller (obs) and () = Infer_pf.factor (-10. *. (abs_float (obs.pole_angle))) - and display = draw_obs_back obs (** PID controller for the cart-pole example **) @@ -72,11 +71,10 @@ let proba pid_model (obs, ctrl_action) = p, (i, d) where let node smart_pid_main () = () where - rec reset action_smart = Infer_pf.plan 10 10 smart_model obs_smart + rec reset action_smart = Infer_pf.plan_pf 30 10 10 smart_model obs_smart every true and obs_smart, _, _ = cart_pole_gym true (Right fby action_smart) and pid_dist = Infer_pf.infer 1000 pid_model (obs_smart, action_smart) - and () = draw_obs_front obs_smart and (p, (i, d)) = Distribution.draw pid_dist and obs, _, stop = cart_pole_gym true (Right fby action) and reset action = pid_controller (obs.pole_angle, (p, i, d)) diff --git a/probzelus/inference/infer_pf.ml b/probzelus/inference/infer_pf.ml index ba8ac98..cf2eeca 100644 --- a/probzelus/inference/infer_pf.ml +++ b/probzelus/inference/infer_pf.ml @@ -26,10 +26,11 @@ open Ztypes open Owl +open Printf type pstate = { - idx : int; (** particle index *) - scores : float array; (** score of each particle *) + idx : int; (** particle index *) + scores : float array; (** score of each particle *) } type prob = pstate @@ -41,37 +42,28 @@ let factor = let alloc () = () in let reset _state = () in let copy _src _dst = () in - let step _state input = - factor' input - in - Cnode { alloc; reset; copy; step; } + let step _state input = factor' input in + Cnode { alloc; reset; copy; step } -let observe' (pstate, (d, v)) = - factor' (pstate, Distribution.score(d, v)) +let observe' (pstate, (d, v)) = factor' (pstate, Distribution.score (d, v)) let observe = let alloc () = () in let reset _state = () in let copy _src _dst = () in - let step _state input = - observe' input - in - Cnode { alloc; reset; copy; step; } + let step _state input = observe' input in + Cnode { alloc; reset; copy; step } -let sample' (_pstate, dist) = - Distribution.draw dist +let sample' (_pstate, dist) = Distribution.draw dist let sample = let alloc () = () in let reset _state = () in let copy _src _dst = () in - let step _state input = - sample' input - in - Cnode { alloc; reset; copy; step; } + let step _state input = sample' input in + Cnode { alloc; reset; copy; step } let id x = x - let const = id let add (x, y) = x +. y let ( +~ ) = ( +. ) @@ -83,7 +75,6 @@ let pair = id let array = id let lst = id let matrix = id - let mat_add (x, y) = Mat.add x y let ( +@~ ) = Mat.( + ) let mat_scalar_mult (a, x) = Mat.mul_scalar x a @@ -91,13 +82,9 @@ let ( $*~ ) = Mat.( $* ) let mat_dot (x, y) = Mat.dot x y let ( *@~ ) = Mat.( *@ ) let vec_get (x, i) = Mat.get x i 0 - let eval = id -type 'a infer_state = { - infer_states : 'a array; - infer_scores : float array; -} +type 'a infer_state = { infer_states : 'a array; infer_scores : float array } (** [infer nb_particles f (b, i)] val infer : @@ -113,8 +100,10 @@ type 'a infer_state = { *) let infer_subresample n (Cnode { alloc; reset; copy; step }) = let alloc () = - { infer_states = Array.init n (fun _ -> alloc ()); - infer_scores = Array.make n 0.0; } + { + infer_states = Array.init n (fun _ -> alloc ()); + infer_scores = Array.make n 0.0; + } in let reset state = Array.iter reset state.infer_states; @@ -124,15 +113,14 @@ let infer_subresample n (Cnode { alloc; reset; copy; step }) = let values = Array.mapi (fun i state -> - let value = step state ({ idx = i; scores = scores; }, input) in - value) + let value = step state ({ idx = i; scores }, input) in + value) states in let probabilities, ret = Normalize.normalize_nohist values scores in - if c then begin + if c then ( Normalize.resample copy n probabilities states; - Array.fill scores 0 n 0.0 - end; + Array.fill scores 0 n 0.0); ret in let copy src dst = @@ -141,16 +129,17 @@ let infer_subresample n (Cnode { alloc; reset; copy; step }) = dst.infer_scores.(i) <- src.infer_scores.(i) done in - Cnode { alloc = alloc; reset = reset; copy = copy; step = step } - + Cnode { alloc; reset; copy; step } (** [infer_ess_resample nb_particles threshold f i] inference with resampling when the effective sample size goes below [threshold]. *) let infer_ess_resample n threshold (Cnode { alloc; reset; copy; step }) = let alloc () = - { infer_states = Array.init n (fun _ -> alloc ()); - infer_scores = Array.make n 0.0; } + { + infer_states = Array.init n (fun _ -> alloc ()); + infer_scores = Array.make n 0.0; + } in let reset state = Array.iter reset state.infer_states; @@ -158,29 +147,24 @@ let infer_ess_resample n threshold (Cnode { alloc; reset; copy; step }) = in let do_resampling scores = let norm = Normalize.log_sum_exp scores in - let scores' = Array.map (fun score -> (score -. norm)) scores in - let num = - (Array.fold_right (fun x acc -> exp x +. acc) scores' 0.) ** 2. - in - let den = - Array.fold_right (fun x acc -> (exp x) ** 2. +. acc) scores' 0. - in + let scores' = Array.map (fun score -> score -. norm) scores in + let num = Array.fold_right (fun x acc -> exp x +. acc) scores' 0. ** 2. in + let den = Array.fold_right (fun x acc -> (exp x ** 2.) +. acc) scores' 0. in let ess = num /. den in - ess < threshold *. (float_of_int n) + ess < threshold *. float_of_int n in - let step { infer_states = states; infer_scores = scores } (input) = + let step { infer_states = states; infer_scores = scores } input = let values = Array.mapi (fun i state -> - let value = step state ({ idx = i; scores = scores; }, input) in - value) + let value = step state ({ idx = i; scores }, input) in + value) states in let probabilities, ret = Normalize.normalize_nohist values scores in - if do_resampling scores then begin + if do_resampling scores then ( Normalize.resample copy n probabilities states; - Array.fill scores 0 n 0.0 - end; + Array.fill scores 0 n 0.0); ret in let copy src dst = @@ -189,81 +173,65 @@ let infer_ess_resample n threshold (Cnode { alloc; reset; copy; step }) = dst.infer_scores.(i) <- src.infer_scores.(i) done in - Cnode { alloc = alloc; reset = reset; copy = copy; step = step } + Cnode { alloc; reset; copy; step } let infer n node = - let Cnode { alloc; reset; copy; step } = infer_subresample n node in - Cnode { alloc; - reset; - copy; - step = (fun state input -> step state (true, input)); } - + let (Cnode { alloc; reset; copy; step }) = infer_subresample n node in + Cnode + { alloc; reset; copy; step = (fun state input -> step state (true, input)) } let infer_noresample n node = - let Cnode { alloc; reset; copy; step } = infer_subresample n node in - Cnode { alloc; - reset; - copy; - step = (fun state input -> step state (false, input)); } - + let (Cnode { alloc; reset; copy; step }) = infer_subresample n node in + Cnode + { + alloc; + reset; + copy; + step = (fun state input -> step state (false, input)); + } (* [memoize_step f x] is functionally equivalent to [f x] but stores *) (* all the pairs (state, input) and the associated result *) let memoize_step step (s, table) x = - try - Hashtbl.find table (s, x) - with - | Not_found -> - let sc = Probzelus_utils.copy s in - let o = step s x in - Hashtbl.add table (sc, x) o; - o + try Hashtbl.find table (s, x) + with Not_found -> + let sc = Probzelus_utils.copy s in + let o = step s x in + Hashtbl.add table (sc, x) o; + o let expectation scores = - let s = Array.fold_left (+.) 0. scores in + let s = Array.fold_left ( +. ) 0. scores in s /. float (Array.length scores) - +(** [plan_step n k model_step model_copy] return a function [step] that duplicates the current particle [n] times and advances it forward [k] times.*) let plan_step n k model_step model_copy = let table = Hashtbl.create 7 in let rec expected_utility (state, score) (ttl, input) = if ttl < 1 then score else - let states = Array.init n (fun _ -> Probzelus_utils.copy state) in - let scores = Array.make n 0.0 in - let score' = - Array.iteri - (fun i state -> - let _ = model_step state ({ idx = i; scores = scores; }, input) in - let score = scores.(i) in - let eu = - memoize_step - expected_utility ((state, score), table) (ttl - 1, input) - in - scores.(i) <- eu) - states; - (* let norm = Normalize.log_sum_exp scores in *) - (* let probabilities = Array.map (fun score -> exp (score -. norm)) scores in *) - (* let scores' = Array.copy scores in *) - (* Normalize.resample model_copy n probabilities states; *) - (* Array.fill scores 0 n 0.0; *) - (* expectation scores' *) - expectation scores + let state = Probzelus_utils.copy state in + + let scores = [| 0. |] in + let _ = model_step state ({ idx = 0; scores }, input) in + let score' = scores.(0) in + let eu = + memoize_step expected_utility ((state, score'), table) (ttl - 1, input) in - score +. score' + score +. eu in let state_value_copy (src_st, src_val) (dst_st, dst_val) = model_copy src_st dst_st; dst_val := !src_val in - let step { infer_states = states; infer_scores = scores; } input = + let step { infer_states = states; infer_scores = scores } input = let values = Array.mapi (fun i state -> - let value = model_step state ({ idx = i; scores = scores; }, input) in - let score = scores.(i) in - scores.(i) <- expected_utility (state, score) (k, input); - value) + let value = model_step state ({ idx = i; scores }, input) in + let score = scores.(i) in + scores.(i) <- expected_utility (state, score) (k, input); + value) states in let states_values = @@ -278,8 +246,49 @@ let plan_step n k model_step model_copy = in step -(* [plan n k f x] runs n instances of [f] on the input stream *) -(* [x] but at each step, do a prediction of depth k *) +(** [plan_step_pf n k model_step model_copy] returns a function [step] that duplicates the current particle [n] times, advances it forward, copies it [n] times, applies a particle filter of size [h], and repeats this process [k] times. *) +let plan_step_pf n h k model_step model_copy = + let table = Hashtbl.create 7 in + let rec expected_utility state (ttl, input) = + let states = Array.init h (fun _ -> Probzelus_utils.copy state) in + let scores = Array.make h 0.0 in + Array.iteri + (fun i state -> ignore @@ model_step state ({ idx = i; scores }, input)) + states; + let norm = Normalize.log_sum_exp scores in + let probabilities = Array.map (fun score -> exp (score -. norm)) scores in + let dist = Normalize.to_distribution (Array.init h id) probabilities in + let index = Distribution.draw dist in + let state, score = (states.(index), scores.(index)) in + if ttl < 1 then score else norm +. expected_utility state (ttl - 1, input) + in + let state_value_copy (src_st, src_val) (dst_st, dst_val) = + model_copy src_st dst_st; + dst_val := !src_val + in + let step { infer_states = states; infer_scores = scores } input = + let values = + Array.mapi + (fun i state -> + let value = model_step state ({ idx = i; scores }, input) in + scores.(i) <- expected_utility state (k, input); + value) + states + in + let states_values = + Array.mapi (fun i state -> (state, ref values.(i))) states + in + let norm = Normalize.log_sum_exp scores in + let probabilities = Array.map (fun score -> exp (score -. norm)) scores in + Normalize.resample state_value_copy n probabilities states_values; + Array.fill scores 0 n 0.0; + Hashtbl.clear table; + states_values + in + step + +(** [plan n k f x] runs n instances of [f] on the input stream + [x] but at each step, do a prediction of depth [k] *) let plan n k (Cnode model : (pstate * 't1, 't2) Ztypes.cnode) = let alloc () = ref (model.alloc ()) in let reset state = model.reset !state in @@ -289,24 +298,46 @@ let plan n k (Cnode model : (pstate * 't1, 't2) Ztypes.cnode) = let states = Array.init n (fun _ -> Probzelus_utils.copy !plan_state) in let scores = Array.make n 0.0 in let states_values = - step_body { infer_states = states; infer_scores = scores; } input + step_body { infer_states = states; infer_scores = scores } input in let dist = Normalize.normalize states_values in let state', value = Distribution.draw dist in plan_state := state'; !value in - Cnode { alloc = alloc; reset = reset; copy = copy; step = step } + Cnode { alloc; reset; copy; step } +(** [plan n k f x] runs n instances of [f] on the input stream + [x] but at each step, do a prediction of depth [k] and use a particle filter of size [h] *) +let plan_pf n h k (Cnode model : (pstate * 't1, 't2) Ztypes.cnode) = + let alloc () = ref (model.alloc ()) in + let reset state = model.reset !state in + let copy src dst = model.copy !src !dst in + let step_body = plan_step_pf n h k model.step model.copy in + let step plan_state input = + let states = Array.init n (fun _ -> Probzelus_utils.copy !plan_state) in + let scores = Array.make n 0.0 in + let states_values = + step_body { infer_states = states; infer_scores = scores } input + in + let dist = Normalize.normalize states_values in + let state', value = Distribution.draw dist in + plan_state := state'; + !value + in + Cnode { alloc; reset; copy; step } -type 'state infd_state = - { infd_states : 'state array; - infd_scores : float array; } +type 'state infd_state = { + infd_states : 'state array; + infd_scores : float array; +} let infer_depth n k (Cnode model) = let alloc () = - { infd_states = Array.init n (fun _ -> model.alloc ()); - infd_scores = Array.make n 0.0; } + { + infd_states = Array.init n (fun _ -> model.alloc ()); + infd_scores = Array.make n 0.0; + } in let reset state = Array.iter model.reset state.infd_states; @@ -320,70 +351,68 @@ let infer_depth n k (Cnode model) = in let step infd_state input = let states_values = - plan_step n k - model.step model.copy - { infer_states = infd_state.infd_states; - infer_scores = infd_state.infd_scores; } input + plan_step n k model.step model.copy + { + infer_states = infd_state.infd_states; + infer_scores = infd_state.infd_scores; + } + input in let values = Array.map (fun (_, v) -> !v) states_values in Normalize.normalize values in - Cnode { alloc = alloc; reset = reset; copy = copy; step = step } + Cnode { alloc; reset; copy; step } - -let hybrid_infer_subresample n m (cstate: Ztypes.cstate) = - let Cnode { alloc; step; reset; copy; } = m cstate in +let hybrid_infer_subresample n m (cstate : Ztypes.cstate) = + let (Cnode { alloc; step; reset; copy }) = m cstate in let hstep self (prob, (c, (t, x))) = step self (t, (prob, (c, x))) in - let Cnode { alloc; step; reset; copy; } = - infer_subresample n (Cnode { alloc; step=hstep; reset; copy; }) + let (Cnode { alloc; step; reset; copy }) = + infer_subresample n (Cnode { alloc; step = hstep; reset; copy }) in - Cnode { - alloc; - step = (fun s (t, ((_, c), x)) -> step s (c, (t, x))); - reset; - copy; } - -let hybrid_infer n m (cstate: Ztypes.cstate) = - let Cnode { alloc; step; reset; copy; } = m cstate in + Cnode + { + alloc; + step = (fun s (t, ((_, c), x)) -> step s (c, (t, x))); + reset; + copy; + } + +let hybrid_infer n m (cstate : Ztypes.cstate) = + let (Cnode { alloc; step; reset; copy }) = m cstate in let hstep self (prob, (t, x)) = step self (t, (prob, x)) in - let Cnode { alloc; step; reset; copy; } = - infer_subresample n (Cnode { alloc; step=hstep; reset; copy; }) + let (Cnode { alloc; step; reset; copy }) = + infer_subresample n (Cnode { alloc; step = hstep; reset; copy }) in - Cnode { alloc; step = (fun s x -> step s (cstate.major, x)); reset; copy; } + Cnode { alloc; step = (fun s x -> step s (cstate.major, x)); reset; copy } -let hybrid_infer_ess_resample n threshold m (cstate: Ztypes.cstate) = - let Cnode { alloc; step; reset; copy; } = m cstate in +let hybrid_infer_ess_resample n threshold m (cstate : Ztypes.cstate) = + let (Cnode { alloc; step; reset; copy }) = m cstate in let hstep self (prob, (t, x)) = step self (t, (prob, x)) in - infer_ess_resample n threshold (Cnode { alloc; step=hstep; reset; copy; }) + infer_ess_resample n threshold (Cnode { alloc; step = hstep; reset; copy }) (** [gen f x] generates a value sampled from the model [f] with input [x] and its corresponding score. The score is reseted at each instant and does not tack into account the likelihood and samples. *) let gen (Cnode { alloc; reset; copy; step }) = - let alloc () = - { infer_states = [| alloc () |]; - infer_scores = [| 0.0 |]; } - in + let alloc () = { infer_states = [| alloc () |]; infer_scores = [| 0.0 |] } in let reset state = reset state.infer_states.(0); state.infer_scores.(0) <- 0.0 in let step { infer_states = states; infer_scores = scores } input = - let value = step states.(0) ({ idx = 0; scores = scores; }, input) in + let value = step states.(0) ({ idx = 0; scores }, input) in let score = scores.(0) in scores.(0) <- 0.; - value, score + (value, score) in let copy src dst = copy src.infer_states.(0) dst.infer_states.(0); dst.infer_scores.(0) <- src.infer_scores.(0) in - Cnode { alloc = alloc; reset = reset; copy = copy; step = step } + Cnode { alloc; reset; copy; step } -let hybrid_gen m (cstate: Ztypes.cstate) = - let Cnode { alloc; step; reset; copy; } = - m cstate - in +let hybrid_gen m (cstate : Ztypes.cstate) = + let (Cnode { alloc; step; reset; copy }) = m cstate in let hstep self (prob, (t, x)) = step self (t, (prob, x)) in - gen (Cnode { alloc; step=hstep; reset; copy; }) + gen (Cnode { alloc; step = hstep; reset; copy }) diff --git a/probzelus/inference/infer_pf.zli b/probzelus/inference/infer_pf.zli index 230188f..aa246a7 100644 --- a/probzelus/inference/infer_pf.zli +++ b/probzelus/inference/infer_pf.zli @@ -59,6 +59,11 @@ val plan : ('t1 ~D~> 't2) -S-> 't1 -D-> 't2 +val plan_pf : + int -S-> int -S-> int -S-> + ('t1 ~D~> 't2) -S-> + 't1 -D-> 't2 + val infer_depth : int -S-> int -S-> ('t1 ~D~> 't2) -S->