Merge pull request #13 from MurrellGroup/OUflow

murrellb · web-flow · commit c7545fee953d · 2025-09-22T00:23:26.000+02:00
Adding OUflow, and example
diff --git a/examples/OU_cat.jl b/examples/OU_cat.jl
@@ -0,0 +1,82 @@
+using Flowfusion, ForwardBackward, Flux, RandomFeatureMaps, Optimisers, Plots
+
+#Set up a Flux model: X̂1 = model(t,Xt)
+struct FModel{A}
+    layers::A
+end
+Flux.@layer FModel
+function FModel(; embeddim = 128, spacedim = 2, layers = 3)
+    embed_time = Chain(RandomFourierFeatures(1 => embeddim, 1f0), Dense(embeddim => embeddim, swish))
+    embed_state = Chain(RandomFourierFeatures(2 => embeddim, 1f0), Dense(embeddim => embeddim, swish))
+    ffs = [Dense(embeddim => embeddim, swish) for _ in 1:layers]
+    decode = Dense(embeddim => spacedim)
+    layers = (; embed_time, embed_state, ffs, decode)
+    FModel(layers)
+end
+
+function (f::FModel)(t, Xt)
+    l = f.layers
+    tXt = tensor(Xt)
+    tv = zero(tXt[1:1,:]) .+ expand(t, ndims(tXt))
+    x = l.embed_time(tv) .+ l.embed_state(tXt)
+    for ff in l.ffs
+        x = x .+ ff(x)
+    end
+    tXt .+ l.decode(x) .* (1.05f0 .- expand(t, ndims(tXt))) 
+end
+
+#Distributions for training:
+T = Float32
+sampleX0(n_samples) = rand(T, 2, n_samples) .+ 2
+sampleX1(n_samples) = Flowfusion.random_literal_cat(n_samples, sigma = T(0.05))
+n_samples = 400
+
+#Look over three different process settings:
+for (θ, v_at_0, v_at_1, dec) in [(10f0, 5f0, 0.01f0, -2f0),(2f0, 2f0, 0.1f0, -2f0),(10f0, 2f0, 0.1f0, -2f0)]
+    #The process:
+    P = OUFlow(θ, v_at_0, v_at_1, dec)
+
+    #Optimizer:
+    eta = 0.001
+    model = FModel(embeddim = 256, layers = 3, spacedim = 2)
+    opt_state = Flux.setup(AdamW(eta = eta), model)
+
+    iters = 4000
+    for i in 1:iters
+        #Set up a batch of training pairs, and t:
+        X0 = ContinuousState(sampleX0(n_samples))
+        X1 = ContinuousState(sampleX1(n_samples))
+        t = rand(T, n_samples).*0.999f0
+        #Construct the bridge:
+        Xt = bridge(P, X0, X1, t)
+        #Gradient & update:
+        l,g = Flux.withgradient(model) do m
+            floss(P, m(t,Xt), X1, scalefloss(P, t))
+        end
+        Flux.update!(opt_state, model, g[1])
+        (i % 10 == 0) && println("i: $i; Loss: $l")
+    end
+
+    n_inference_samples = 5000
+    X0 = ContinuousState(sampleX0(n_inference_samples))
+    paths = Tracker()
+    samples = gen(P, X0, model, 0f0:0.005f0:1f0, tracker = paths)
+    #Plotting:
+    pl = scatter(X0.state[1,:],X0.state[2,:], msw = 0, ms = 1, color = "blue", alpha = 0.5, size = (400,400), legend = :topleft, label = "X0")
+    tvec = stack_tracker(paths, :t)
+    xttraj = stack_tracker(paths, :xt)
+    for i in 1:50:1000
+        plot!(xttraj[1,i,:], xttraj[2,i,:], color = "red", label = i==1 ? "Trajectory" : :none, alpha = 0.4)
+    end
+    X1true = sampleX1(n_inference_samples)
+    scatter!(X1true[1,:],X1true[2,:], msw = 0, ms = 1, color = "orange", alpha = 0.5, label = "X1 (true)")
+    scatter!(samples.state[1,:],samples.state[2,:], msw = 0, ms = 1, color = "green", alpha = 0.5, label = "X1 (generated)")
+    display(pl)
+    savefig("OU_continuous_cat_$P.svg")
+    pl = plot()
+    for i in 1:50:1000
+        plot!(xttraj[1,i,:], color = "red", alpha = 0.4, label = :none)
+    end
+    pl
+    savefig("OU_continuous_traj_$P.svg")
+end
diff --git a/src/Flowfusion.jl b/src/Flowfusion.jl
@@ -35,6 +35,7 @@ export
     InterpolatingDiscreteFlow,
     NoisyInterpolatingDiscreteFlow,
     DoobMatchingFlow,
+    OUFlow,
     MaskedState,
     Guide,
     tangent_guide,
diff --git a/src/loss.jl b/src/loss.jl
@@ -29,15 +29,17 @@ msu(T) = Union{T, MaskedState{<:T}}
 
 floss(P::fbu(Deterministic),                X̂₁, X₁::msu(ContinuousState), c) = scaledmaskedmean(mse(X̂₁, X₁), c, getlmask(X₁))
 floss(P::fbu(BrownianMotion),               X̂₁, X₁::msu(ContinuousState), c) = scaledmaskedmean(mse(X̂₁, X₁), c, getlmask(X₁))
-floss(P::fbu(OrnsteinUhlenbeck),            X̂₁, X₁::msu(ContinuousState), c) = scaledmaskedmean(mse(X̂₁, X₁), c, getlmask(X₁))
+floss(P::OUFlow,                            X̂₁, X₁::msu(ContinuousState), c) = scaledmaskedmean(mse(X̂₁, X₁), c, getlmask(X₁)) #No schedule (via fbu) for OUFlow. Schedule should not be needed anyway given the direct variance control.
 floss(P::fbu(ManifoldProcess{<:Euclidean}), X̂₁, X₁::msu(ContinuousState), c) = scaledmaskedmean(mse(X̂₁, X₁), c, getlmask(X₁))
+#floss(P::fbu(OrnsteinUhlenbeck),            X̂₁, X₁::msu(ContinuousState), c) = scaledmaskedmean(mse(X̂₁, X₁), c, getlmask(X₁)) #<- I'm not sure MSE on X1 works for this process. We need to pull X1 back to Xt and get the generator.
 #For a discrete process, X̂₁ will be a distribution, and X₁ will have to be a onehot before going onto the gpu.
 floss(P::fbu(DiscreteProcess), X̂₁, X₁::msu(DiscreteState{<:AbstractArray{<:Integer}}), c) = error("X₁ needs to be onehot encoded with `onehot(X₁)`. You might need to do this before moving it to the GPU.")
 floss(P::fbu(DiscreteProcess), X̂₁, X₁::msu(DiscreteState{<:OneHotArray}), c) = scaledmaskedmean(lce(X̂₁, X₁), c, getlmask(X₁))
 floss(P::Tuple, X̂₁::Tuple, X₁::Tuple, c::Union{AbstractArray, Real}) = sum(floss.(P, X̂₁, X₁, (c,)))
 floss(P::Tuple, X̂₁::Tuple, X₁::Tuple, c::Tuple) = sum(floss.(P, X̂₁, X₁, c))
 floss(P::Union{fbu(ManifoldProcess), fbu(Deterministic)}, ξhat, ξ::Guide, c) = scaledmaskedmean(mse(ξhat, ξ.H), c, getlmask(ξ))
 
+
 #I should make a self-balancing loss that tracks the running mean/std and adaptively scales to balance against target weights.
 
 ########################################################################
diff --git a/src/processes.jl b/src/processes.jl
@@ -129,3 +129,9 @@ function step(P::NoisyInterpolatingDiscreteFlow{<:Integer}, Xₜ::DiscreteState{
     clamp!(tensor(newXₜ), 0, Inf)
     return rand(newXₜ)
 end
+
+function bridge(P::OUFlow, X0, X1, t0, t)
+    OU = OrnsteinUhlenbeckExpVar(tensor(X1), P.θ, P.v_at_0, P.v_at_1, dec = P.dec) #<-Note X1 as mean
+    endpoint_conditioned_sample(X0, X1, OU, t0, t, eltype(t)(1))
+end
+
diff --git a/src/types.jl b/src/types.jl
@@ -63,4 +63,14 @@ struct NoisyInterpolatingDiscreteFlow{T} <: ConvexInterpolatingDiscreteFlow
     dκ₁::Function   # derivative of κ₁
     dκ₂::Function   # derivative of κ₂
     mask_token::T   # the token that is used for the X0 state
-end
+end
+
+#A process where mean to which it reverts is X1
+struct OUFlow{T} <: Process
+    θ::T
+    v_at_0::T
+    v_at_1::T
+    dec::T
+end
+
+OUFlow(θ::T, v_at_0::T) where T = OUFlow(θ, v_at_0, T(1e-2), T(-0.1))