add callbacks VI and training; fix docstrings

marcoct · marcoct · commit 9ad7e97d4f3a · 2020-12-11T00:53:06.000-05:00
diff --git a/src/inference/train.jl b/src/inference/train.jl
@@ -3,17 +3,29 @@
            update::ParamUpdate,
            num_epoch, epoch_size, num_minibatch, minibatch_size; verbose::Bool=false)
 
-Train the given generative function to maximize the expected conditional log probability (density) that `gen_fn` generates the assignment `constraints` given inputs, where the expectation is taken under the output distribution of `data_generator`.
+Train the given generative function to maximize the expected conditional log
+probability (density) that `gen_fn` generates the assignment `constraints`
+given inputs, where the expectation is taken under the output distribution of
+`data_generator`.
+
+The function `data_generator` is a function of no arguments that returns a
+tuple `(inputs, constraints)` where `inputs` is a `Tuple` of inputs (arguments)
+to `gen_fn`, and `constraints` is an `ChoiceMap`.
 
-The function `data_generator` is a function of no arguments that returns a tuple `(inputs, constraints)` where `inputs` is a `Tuple` of inputs (arguments) to `gen_fn`, and `constraints` is an `ChoiceMap`.
 `conf` configures the optimization algorithm used.
+
 `param_lists` is a map from generative function to lists of its parameters.
-This is equivalent to minimizing the expected KL divergence from the conditional distribution `constraints | inputs` of the data generator to the distribution represented by the generative function, where the expectation is taken under the marginal distribution on `inputs` determined by the data generator.
+This is equivalent to minimizing the expected KL divergence from the
+conditional distribution `constraints | inputs` of the data generator to the
+distribution represented by the generative function, where the expectation is
+taken under the marginal distribution on `inputs` determined by the data
+generator.
 """
 function train!(gen_fn::GenerativeFunction, data_generator::Function,
                 update::ParamUpdate;
                 num_epoch=1, epoch_size=1, num_minibatch=1, minibatch_size=1,
-                evaluation_size=epoch_size, verbose=false)
+                evaluation_size=epoch_size, verbose=false,
+                callback=(epoch, minibatch, minibatch_objective) -> nothing)
 
     history = Vector{Float64}(undef, num_epoch)
     for epoch=1:num_epoch
@@ -37,11 +49,15 @@ function train!(gen_fn::GenerativeFunction, data_generator::Function,
             minibatch_idx = permuted[1:minibatch_size]
             minibatch_inputs = epoch_inputs[minibatch_idx]
             minibatch_choice_maps = epoch_choice_maps[minibatch_idx]
+            minibatch_objective = 0.0
             for (inputs, constraints) in zip(minibatch_inputs, minibatch_choice_maps)
-                (trace, _) = generate(gen_fn, inputs, constraints)
+                (trace, weight) = generate(gen_fn, inputs, constraints)
+                minibatch_objective += weight
                 accumulate_param_gradients!(trace)
             end
             apply!(update)
+            minibatch_objective /= minibatch_size
+            callback(epoch, minibatch, minibatch_objective)
         end
 
         # evaluate score on held out data
diff --git a/src/inference/variational.jl b/src/inference/variational.jl
@@ -94,16 +94,20 @@ end
         observations::ChoiceMap,
         var_model::GenerativeFunction, var_model_args::Tuple,
         update::ParamUpdate;
-        iters=1000, samples_per_iter=100, verbose=false)
+        iters=1000, samples_per_iter=100, verbose=false,
+        callback=(iter, traces, elbo_estimate) -> nothing)
 
-Fit the parameters of a generative function (`var_model`) to the posterior distribution implied by the given model and observations using stochastic gradient methods.
+Fit the parameters of a generative function (`var_model`) to the posterior
+distribution implied by the given model and observations using stochastic
+gradient methods.
 """
 function black_box_vi!(
         model::GenerativeFunction, model_args::Tuple,
         observations::ChoiceMap,
         var_model::GenerativeFunction, var_model_args::Tuple,
         update::ParamUpdate;
-        iters=1000, samples_per_iter=100, verbose=false)
+        iters=1000, samples_per_iter=100, verbose=false,
+        callback=(iter, traces, elbo_estimate) -> nothing)
 
     traces = Vector{Any}(undef, samples_per_iter)
     elbo_history = Vector{Float64}(undef, iters)
@@ -126,6 +130,9 @@ function black_box_vi!(
         # print it
         verbose && println("iter $iter; est objective: $elbo_estimate")
 
+        # callback
+        callback(iter, traces, elbo_estimate)
+
         # do an update
         apply!(update)
     end
@@ -139,17 +146,23 @@ end
         observations::ChoiceMap,
         var_model::GenerativeFunction, var_model_args::Tuple,
         update::ParamUpdate, num_samples::Int;
-        iters=1000, samples_per_iter=100, verbose=false)
+        iters=1000, samples_per_iter=100, verbose=false,
+        callback=(iter, traces, elbo_estimate) -> nothing)
 
-Fit the parameters of a generative function (`var_model`) to the posterior distribution implied by the given model and observations using stochastic gradient methods applied to the [Variational Inference with Monte Carlo Objectives](https://arxiv.org/abs/1602.06725) lower bound on the marginal likelihood.
+Fit the parameters of a generative function (`var_model`) to the posterior
+distribution implied by the given model and observations using stochastic
+gradient methods applied to the [Variational Inference with Monte Carlo
+Objectives](https://arxiv.org/abs/1602.06725) lower bound on the marginal
+likelihood.
 """
 function black_box_vimco!(
         model::GenerativeFunction, model_args::Tuple,
         observations::ChoiceMap,
         var_model::GenerativeFunction, var_model_args::Tuple,
         update::ParamUpdate, num_samples::Int;
         iters=1000, samples_per_iter=100, verbose=false,
-        geometric=true)
+        geometric=true,
+        callback=(iter, traces, elbo_estimate) -> nothing)
 
     traces = Vector{Any}(undef, samples_per_iter)
     iwelbo_history = Vector{Float64}(undef, iters)
@@ -172,6 +185,9 @@ function black_box_vimco!(
         # print it
         verbose && println("iter $iter; est objective: $iwelbo_estimate")
 
+        # callback
+        callback(iter, traces, iwelbo_estimate)
+
         # do an update
         apply!(update)
     end