change Y input to dictionary with forecast variables as keys

Giovanni3A · Giovanni3A · commit 776c2a338488 · 2026-01-26T00:14:01.000Z
diff --git a/src/ApplicationDrivenLearning.jl b/src/ApplicationDrivenLearning.jl
@@ -251,7 +251,7 @@ Train model using given data and options.
 function train!(
     model::Model,
     X::Matrix{<:Real},
-    y::Matrix{<:Real},
+    y::Dict{<:Forecast, <:Vector},
     options::Options,
 )
     if options.mode == NelderMeadMode
diff --git a/src/optimizers/bilevel.jl b/src/optimizers/bilevel.jl
@@ -5,7 +5,7 @@ using BilevelJuMP
 function solve_bilevel(
     model::Model,
     X::Matrix{<:Real},
-    Y::Matrix{<:Real},
+    Y::Dict{<:Forecast, <:Vector},
     params::Dict{Symbol,Any},
 )
 
@@ -23,7 +23,7 @@ function solve_bilevel(
     end
 
     # parameters
-    T = size(Y, 1)
+    T = size(X, 1)
 
     # lower model variables
     low_var_map =
@@ -107,22 +107,21 @@ function solve_bilevel(
     @objective(Upper(bilevel_model), post_obj_sense, up_obj)
 
     # fix upper model observations
-    i_obs_var = 1
-    for obs_var in assess_forecast_vars(model)
+    for obs_var in model.forecast_vars
         @constraint(
             Upper(bilevel_model),
-            up_var_map[obs_var] - Y[1:T, i_obs_var] .== 0
+            up_var_map[obs_var.assess] - Y[obs_var] .== 0
         )
-        i_obs_var += 1
     end
 
     # implement predictive model expression iterating through 
     # models and layers to create predictive expression
     npreds = size(model.forecast.networks, 1)
     predictive_model_vars = [Dict{Int,Any}() for ipred = 1:npreds]
-    y_hat = Matrix{Any}(undef, size(Y, 1), size(Y, 2))
+    # y_hat = Matrix{Any}(undef, size(Y, 1), size(Y, 2))
+    y_hat = VariableIndexedMatrix{Any}(nothing, model.forecast_vars, T)
     for ipred = 1:npreds
-        layers_inpt = Dict{Any,Any}(
+        layers_inpt = Dict{Vector{Forecast},Matrix{Any}}(
             output_idx => X[1:T, input_idx] for (input_idx, output_idx) in
             model.forecast.input_output_map[ipred]
         )
@@ -158,19 +157,17 @@ function solve_bilevel(
             i_layer += 1
         end
         for (output_idx, prediction) in layers_inpt
-            y_hat[:, output_idx] = prediction
+            y_hat[output_idx] = prediction
         end
     end
 
     # and apply prediction on lower model as constraint
-    ipred_var_count = 1
-    for pred_var in plan_forecast_vars(model)
-        low_pred_var = low_var_map[pred_var]
+    for pred_var in model.forecast_vars
+        low_pred_var = low_var_map[pred_var.plan]
         @constraint(
             Lower(bilevel_model),
-            low_pred_var .- y_hat[:, ipred_var_count] .== 0
+            low_pred_var .- y_hat[pred_var] .== 0
         )
-        ipred_var_count += 1
     end
 
     # solve model
diff --git a/src/optimizers/gradient.jl b/src/optimizers/gradient.jl
@@ -5,7 +5,8 @@ Compute assess cost and cost gradient (with respect to predicted values) based
 on incomplete batch of examples.
 """
 function stochastic_compute(model, X, Y, batch, compute_full_cost::Bool)
-    C, dC = compute_cost(model, X[batch, :], Y[batch, :], true)
+    Y_batch = Dict(k => v[batch] for (k, v) in Y)
+    C, dC = compute_cost(model, X[batch, :], Y_batch, true)
     if compute_full_cost
         C = compute_cost(model, X, Y, false)
     end
@@ -24,7 +25,7 @@ end
 function train_with_gradient!(
     model::Model,
     X::Matrix{<:Real},
-    Y::Matrix{<:Real},
+    Y::Dict{<:Forecast, <:Vector},
     params::Dict{Symbol,Any},
 )
     # extract params
diff --git a/src/optimizers/nelder_mead.jl b/src/optimizers/nelder_mead.jl
@@ -3,7 +3,7 @@ using Optim
 function train_with_nelder_mead!(
     model::Model,
     X::Matrix{<:Real},
-    Y::Matrix{<:Real},
+    Y::Dict{<:Forecast, <:Vector},
     params::Dict{Symbol,Any},
 )
 
diff --git a/src/simulation.jl b/src/simulation.jl
@@ -1,10 +1,10 @@
 function compute_single_step_cost(
     model::Model,
-    y::Vector{<:Real},
+    y::VariableIndexedVector,
     yhat::VariableIndexedVector,
 )
     # set forecast params as prediction output
-    MOI.set.(model.plan, POI.ParameterValue(), model.plan_forecast_params, yhat[model.forecast_vars])
+    MOI.set.(model.plan, POI.ParameterValue(), model.plan_forecast_params, yhat[model.forecast_vars].data)
     # optimize plan model
     optimize!(model.plan)
     # check for solution and fix assess policy vars
@@ -18,7 +18,7 @@ function compute_single_step_cost(
         throw(e)
     end
     # fix assess forecast vars on observer values
-    fix.(assess_forecast_vars(model), y; force = true)
+    fix.(model.forecast_vars.assess, y[model.forecast_vars].data; force = true)
     # optimize assess model
     optimize!(model.assess)
     # check for optimization
@@ -38,7 +38,7 @@ Computes the gradient of the cost function (C) with respect to the predictions (
 function compute_single_step_gradient(
     model::Model,
     dCdz::Vector{<:Real},
-    dCdy::Vector{<:Real},
+    dCdy::VariableIndexedVector{<:Real},
 )
     dCdz .= dual.(model.assess[:assess_policy_fix])
     DiffOpt.empty_input_sensitivities!(model.plan)
@@ -51,12 +51,12 @@ function compute_single_step_gradient(
         )
     end
     DiffOpt.reverse_differentiate!(model.plan)
-    for j = 1:size(model.forecast_vars, 1)
-        dCdy[j] =
+    for fv in model.forecast_vars
+        dCdy[fv] =
             MOI.get(
                 model.plan,
                 DiffOpt.ReverseConstraintSet(),
-                ParameterRef(model.plan_forecast_params[j]),
+                ParameterRef(fv.plan),
             ).value
     end
 
@@ -81,26 +81,36 @@ Compute the cost function (C) based on the model predictions and the true values
 function compute_cost(
     model::Model,
     X::Matrix{<:Real},
-    Y::Matrix{<:Real},
+    Y::Dict{<:Forecast, <:Vector},
     with_gradients::Bool = false,
     aggregate::Bool = true,
 )
 
     # data size assertions
     @assert size(X)[2] == model.forecast.input_size "Input size mismatch"
-    @assert size(Y)[2] == model.forecast.output_size "Output size mismatch"
+    @assert length(Y) == model.forecast.output_size "Output size mismatch"
 
     # build model variables if necessary
     build(model)
 
     # init parameters
-    T = size(Y)[1]
-    C = zeros(T)
-    dC = zeros((T, model.forecast.output_size))
+    T = length.(collect(values(Y)))[1]
+    C = Vector{Float32}(undef, T)
     dCdz = Vector{Float32}(undef, size(model.policy_vars, 1))
-    dCdy = Vector{Float32}(undef, model.forecast.output_size)
+    dCdy = VariableIndexedVector{Float32}(undef, model.forecast_vars)
+    dC = VariableIndexedMatrix{Float32}(undef, model.forecast_vars, T)
 
-    function _compute_step(y::Vector{<:Real}, yhat::VariableIndexedVector)
+    function _get_index_y(Y::Dict{<:Forecast, <:Vector}, idx::Int)
+        var_index = Vector{Forecast}(undef, model.forecast.output_size)
+        y_values = Vector{Real}(undef, model.forecast.output_size)
+        for (i, (fvar, vals)) in enumerate(Y)
+            var_index[i] = fvar
+            y_values[i] = vals[idx]
+        end
+        return VariableIndexedVector(y_values, var_index)
+    end
+
+    function _compute_step(y::VariableIndexedVector, yhat::VariableIndexedVector)
         c = compute_single_step_cost(model, y, yhat)
         if with_gradients
             dc = compute_single_step_gradient(model, dCdz, dCdy)
@@ -114,15 +124,15 @@ function compute_cost(
 
     # main loop to compute cost
     for t = 1:T
-        result = _compute_step(Y[t, :], Yhat[t])
-        C[t] += result[1]
-        dC[t, :] .+= result[2]
+        result = _compute_step(_get_index_y(Y, t), Yhat[t])
+        C[t] = result[1]
+        dC[t] = result[2]
     end
 
     # aggregate cost if requested
     if aggregate
         C = sum(C) / T
-        dC = sum(dC, dims = 1)[1, :] / T
+        dC = sum(dC) / T
     end
 
     if with_gradients
diff --git a/src/variable_indexed_structs.jl b/src/variable_indexed_structs.jl
@@ -1,4 +1,5 @@
 import LinearAlgebra
+import Base./
 
 """
     VariableIndexedVector(data::Vector{T}, index::Vector{<:Forecast})
@@ -34,6 +35,9 @@ Base.length(v::VariableIndexedVector) = length(v.data)
 Base.getindex(v::VariableIndexedVector, i::Int) = v.data[i]
 Base.setindex!(v::VariableIndexedVector, val, i::Int) = (v.data[i] = val)
 
+# define Base rigth divide function (v / 2)
+/(v::VariableIndexedVector, i::Number) = VariableIndexedVector(v.data / i, v.index)
+
 # define dot product of two VariableIndexedVectors
 function LinearAlgebra.dot(v1::VariableIndexedVector, v2::VariableIndexedVector)
     @assert length(v1) == length(v2) "Vectors must have the same length"
@@ -98,6 +102,23 @@ struct VariableIndexedMatrix{T} <: AbstractMatrix{T}
         @assert length(unique(row_index)) == length(row_index) "Variables must be unique"
         new{T}(data, row_index)
     end
+
+    function VariableIndexedMatrix{T}(::UndefInitializer, index::Vector{<:Forecast}, n::Real) where T
+        return new{T}(Matrix{T}(undef, length(index), n), index)
+    end
+
+    function VariableIndexedMatrix{T}(::Nothing, index::Vector{<:Forecast}, n::Real) where T
+        return new{T}(Matrix{T}(nothing, length(index), n), index)
+    end
+end
+
+# helper to find index of a Forecast variable
+function _get_idx(m::VariableIndexedMatrix, var::Forecast)
+    i = findfirst(isequal(var), m.row_index)
+    if isnothing(i)
+        throw(KeyError(var))
+    end
+    return i
 end
 
 Base.size(m::VariableIndexedMatrix) = size(m.data)
@@ -106,11 +127,44 @@ Base.size(m::VariableIndexedMatrix) = size(m.data)
 Base.getindex(m::VariableIndexedMatrix, i::Int, j::Int) = m.data[i, j]
 Base.setindex!(m::VariableIndexedMatrix, val, i::Int, j::Int) = (m.data[i, j] = val)
 
-# column lookup (get column 2) {M[:, 2]}
+# column lookup (get column 2) {M[2]}
 function Base.getindex(m::VariableIndexedMatrix, c::Int)
     return VariableIndexedVector(m.data[:, c], m.row_index)
 end
 
+# row lookup (get values from variable) {M[forecast_var]}
+function Base.getindex(m::VariableIndexedMatrix, var::Forecast)
+    return m.data[_get_idx(m, var), :]
+end
+
+# multi-row lookup {M[[f_var_1, f_var_2]]}
+function Base.getindex(m::VariableIndexedMatrix, vars::Vector{<:Forecast})
+    return VariableIndexedMatrix(m.data[[_get_idx(m, var) for var in vars], :], vars)
+end
+
+# set column 2 values for all var indices {M[2] = [1,2,3]}
+function Base.setindex!(m::VariableIndexedMatrix, values::VariableIndexedVector, c::Int)
+    m.data[:, c] = values[m.row_index]
+end
+
+# set values for a single var index {M[forecast_var] = [1,2,3]}
+function Base.setindex!(m::VariableIndexedMatrix, values::Vector, var::Forecast)
+    m.data[_get_idx(m, var), :] = values
+end
+
+# set values for a subset of var indices {M[[f_var_1, f_var_2]] = [[1 2 3]; [4 5 6]]}
+function Base.setindex!(m::VariableIndexedMatrix, values::Matrix, vars::Vector{<:Forecast})
+    m.data[[_get_idx(m, var) for var in vars], :] = values
+end
+
+# define sum of matrix by summing all values for each variable
+function Base.sum(m::VariableIndexedMatrix)
+    return VariableIndexedVector(sum(m.data, dims=2)[:, 1], m.row_index)
+end
+
+# define Base rigth divide function (M / 2)
+/(m::ApplicationDrivenLearning.VariableIndexedMatrix, i::Number) = ApplicationDrivenLearning.VariableIndexedMatrix(m.data / i, m.row_index)
+
 # define dot product of a VariableIndexedVectors and a VariableIndexedMatrix
 function LinearAlgebra.dot(v1::VariableIndexedVector, m2::VariableIndexedMatrix)
     @assert length(v1) == size(m2, 1) "Vector must have the same length as the number of rows in matrix"
diff --git a/test/test_gradient.jl b/test/test_gradient.jl
@@ -1,6 +1,4 @@
 # basic model for testing gradient mode
-X = Float32.(ones(1, 1))
-Y = Float32.(ones(1, 1))
 model = ApplicationDrivenLearning.Model()
 @variables(model, begin
     x >= 0, ApplicationDrivenLearning.Policy
@@ -11,6 +9,8 @@ end)
 set_optimizer(model, HiGHS.Optimizer)
 set_silent(model)
 ApplicationDrivenLearning.set_forecast_model(model, Chain(Dense(1 => 1)))
+X = Float32.(ones(1, 1))
+Y = Dict(d => Float32.(ones(1)))
 
 @testset "GradientMode Stop Rules" begin
     # epochs
diff --git a/test/test_newsvendor.jl b/test/test_newsvendor.jl
@@ -1,10 +1,6 @@
 c = 5.0
 q = 9.0
 r = 4.0
-X = ones(1, 1)
-Y = 50 * ones(1, 1)
-best_decision = y = Y[1, 1]
-best_cost = (c - q) * y
 
 model = ApplicationDrivenLearning.Model()
 @variables(model, begin
@@ -44,6 +40,11 @@ set_optimizer(model, HiGHS.Optimizer)
 set_silent(model)
 nn = Chain(Dense(1 => 1; bias = false, init = (size...) -> rand(size...)))
 
+X = ones(1, 1)
+Y = Dict(d => [50.0])
+best_decision = y = Y[d][1]
+best_cost = (c - q) * y
+
 @testset "Newsvendor BilevelMode" begin
     ApplicationDrivenLearning.set_forecast_model(
         model,
@@ -87,7 +88,7 @@ end
     opt = ApplicationDrivenLearning.Options(
         ApplicationDrivenLearning.GradientMode;
         rule = Flux.Adam(1.0),
-        epochs = 150,
+        epochs = 200,
         batch_size = -1,
         verbose = false,
     )

Original file line number	Diff line number	Diff line change
`@@ -251,7 +251,7 @@ Train model using given data and options.`
`251`	`251`	`function train!(`
`252`	`252`	`model::Model,`
`253`	`253`	`X::Matrix{<:Real},`
`254`		`- y::Matrix{<:Real},`
	`254`	`+ y::Dict{<:Forecast, <:Vector},`
`255`	`255`	`options::Options,`
`256`	`256`	`)`
`257`	`257`	`if options.mode == NelderMeadMode`
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@ using Optim`
`3`	`3`	`function train_with_nelder_mead!(`
`4`	`4`	`model::Model,`
`5`	`5`	`X::Matrix{<:Real},`
`6`		`- Y::Matrix{<:Real},`
	`6`	`+ Y::Dict{<:Forecast, <:Vector},`
`7`	`7`	`params::Dict{Symbol,Any},`
`8`	`8`	`)`
`9`	`9`