Improve anticipative policy

BatyLeo · BatyLeo · commit ed01c451f0d6 · 2025-08-08T17:05:16.000+02:00
diff --git a/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl b/src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl
@@ -67,7 +67,7 @@ $TYPEDFIELDS
 end
 
 function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_size::Int=1)
-    (; max_requests_per_epoch, Δ_dispatch, epoch_duration) = b
+    (; max_requests_per_epoch, Δ_dispatch, epoch_duration, two_dimensional_features) = b
     files = readdir(datadep"dvrptw"; join=true)
     dataset_size = min(dataset_size, length(files))
     return [
@@ -77,6 +77,7 @@ function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_si
                 max_requests_per_epoch,
                 Δ_dispatch,
                 epoch_duration,
+                two_dimensional_features,
             ),
         ) for i in 1:dataset_size
     ]
diff --git a/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl b/src/DynamicVehicleScheduling/algorithms/anticipative_solver.jl
@@ -4,15 +4,17 @@ $TYPEDSIGNATURES
 Retrieve anticipative routes solution from the given MIP solution `y`.
 Outputs a set of routes per epoch.
 """
-function retrieve_routes_anticipative(y::AbstractArray, dvspenv::DVSPEnv, customer_index)
+function retrieve_routes_anticipative(
+    y::AbstractArray, dvspenv::DVSPEnv, customer_index, epoch_indices
+)
     nb_tasks = length(customer_index)
-    first_epoch = 1
-    (; last_epoch) = dvspenv.instance
+    # first_epoch = 1
+    # (; last_epoch) = dvspenv.instance
     job_indices = 2:(nb_tasks)
-    epoch_indices = first_epoch:last_epoch
+    # epoch_indices = first_epoch:last_epoch
 
     routes = [Vector{Int}[] for _ in epoch_indices]
-    for t in epoch_indices
+    for (i, t) in enumerate(epoch_indices)
         start = [i for i in job_indices if y[1, i, t] ≈ 1]
         for task in start
             route = Int[]
@@ -28,7 +30,7 @@ function retrieve_routes_anticipative(y::AbstractArray, dvspenv::DVSPEnv, custom
                 end
                 current_task = next_task
             end
-            push!(routes[t], route)
+            push!(routes[i], route)
         end
     end
     return routes
@@ -44,28 +46,33 @@ function anticipative_solver(
     env::DVSPEnv,
     scenario=env.scenario;
     model_builder=highs_model,
-    reset_env=false,
-    two_dimensional_features=false,
+    two_dimensional_features=env.instance.two_dimensional_features,
+    reset_env=true,
+    nb_epochs=typemax(Int),
 )
-    reset_env && reset!(env)
+    reset_env && reset!(env; reset_seed=true)
+
+    start_epoch = current_epoch(env)
+    end_epoch = min(last_epoch(env), start_epoch + nb_epochs - 1)
+    T = start_epoch:end_epoch
+
     request_epoch = [0]
-    for (epoch, indices) in enumerate(scenario.indices)
-        request_epoch = vcat(request_epoch, fill(epoch, length(indices)))
+    for t in T
+        request_epoch = vcat(request_epoch, fill(t, length(scenario.indices[t])))
     end
-    customer_index = vcat(1, scenario.indices...)
-    service_time = vcat(0.0, scenario.service_time...)
-    start_time = vcat(0.0, scenario.start_time...)
+    customer_index = vcat(1, scenario.indices[T]...)
+    service_time = vcat(0.0, scenario.service_time[T]...)
+    start_time = vcat(0.0, scenario.start_time[T]...)
 
     duration = env.instance.static_instance.duration[customer_index, customer_index]
-    first_epoch = 1
-    (; last_epoch, epoch_duration, Δ_dispatch) = env.instance
+    (; epoch_duration, Δ_dispatch) = env.instance
 
     model = model_builder()
     set_silent(model)
 
     nb_nodes = length(customer_index)
     job_indices = 2:nb_nodes
-    epoch_indices = first_epoch:last_epoch
+    epoch_indices = T#first_epoch:last_epoch
 
     @variable(model, y[i=1:nb_nodes, j=1:nb_nodes, t=epoch_indices]; binary=true)
 
@@ -102,7 +109,7 @@ function anticipative_solver(
 
     # a trip from i can be done only before limit date
     for i in job_indices, t in epoch_indices, j in 1:nb_nodes
-        if (t - 1) * epoch_duration + duration[1, i] + Δ_dispatch > start_time[i]  # ! this only works if first_epoch = 1
+        if (t - 1) * epoch_duration + duration[1, i] + Δ_dispatch > start_time[i]
             @constraint(model, y[i, j, t] <= 0)
         end
     end
@@ -121,27 +128,32 @@ function anticipative_solver(
     optimize!(model)
 
     obj = JuMP.objective_value(model)
-    epoch_routes = retrieve_routes_anticipative(value.(y), env, customer_index)
+    epoch_routes = retrieve_routes_anticipative(
+        value.(y), env, customer_index, epoch_indices
+    )
 
     epoch_indices = Vector{Int}[]
     N = 1
     indices = [1]
-    for epoch in 1:last_epoch
+    index = 1
+    for epoch in 1:last_epoch(env)
         M = length(scenario.indices[epoch])
         indices = vcat(indices, (N + 1):(N + M))
         push!(epoch_indices, copy(indices))
         N = N + M
-        epoch_routes[epoch]
-        dispatched = vcat(epoch_routes[epoch]...)
-        indices = setdiff(indices, dispatched)
+        if epoch in T
+            dispatched = vcat(epoch_routes[index]...)
+            index += 1
+            indices = setdiff(indices, dispatched)
+        end
     end
 
     indices = vcat(1, scenario.indices...)
     start_time = vcat(0.0, scenario.start_time...)
     service_time = vcat(0.0, scenario.service_time...)
 
-    dataset = map(1:last_epoch) do epoch
-        routes = epoch_routes[epoch]
+    dataset = map(enumerate(T)) do (i, epoch)
+        routes = epoch_routes[i]
         epoch_customers = epoch_indices[epoch]
 
         y_true =
@@ -170,9 +182,13 @@ function anticipative_solver(
         epoch_duration = env.instance.epoch_duration
         Δ_dispatch = env.instance.Δ_dispatch
         planning_start_time = (epoch - 1) * epoch_duration + Δ_dispatch
-        is_must_dispatch[2:end] .=
-            planning_start_time .+ epoch_duration .+ @view(new_duration[1, 2:end]) .>
-            new_start_time[2:end]
+        if epoch == last_epoch
+            # If we are in the last epoch, all requests must be dispatched
+            is_must_dispatch[2:end] .= true
+        else
+            is_must_dispatch[2:end] .=
+                planning_start_time .+ epoch_duration .+ @view(new_duration[1, 2:end]) .> new_start_time[2:end]
+        end
         is_postponable[2:end] .= .!is_must_dispatch[2:end]
 
         state = DVSPState(;
@@ -183,7 +199,6 @@ function anticipative_solver(
             current_epoch=epoch,
         )
 
-        # x = compute_2D_features(state, env.instance)
         x = if two_dimensional_features
             compute_2D_features(state, env.instance)
         else
@@ -195,17 +210,3 @@ function anticipative_solver(
 
     return obj, dataset
 end
-
-# @kwdef struct AnticipativeSolver
-#     is_2D::Bool = false
-# end
-
-# function (solver::AnticipativeSolver)(env::DVSPEnv, scenario=env.scenario; reset_env=false)
-#     return generate_anticipative_decision(
-#         env,
-#         scenario;
-#         model_builder=highs_model,
-#         reset_env,
-#         two_dimensional_features=solver.is_2D,
-#     )
-# end
diff --git a/src/DynamicVehicleScheduling/environment/environment.jl b/src/DynamicVehicleScheduling/environment/environment.jl
@@ -35,7 +35,13 @@ $TYPEDSIGNATURES
 
 Get the current state of the environment.
 """
-Utils.observe(env::DVSPEnv) = nothing, env.state
+function Utils.observe(env::DVSPEnv)
+    if env.instance.two_dimensional_features
+        return compute_2D_features(env.state, env.instance), env.state
+    end
+    # else
+    return compute_features(env.state, env.instance), env.state
+end
 
 current_epoch(env::DVSPEnv) = current_epoch(env.state)
 
diff --git a/src/DynamicVehicleScheduling/environment/instance.jl b/src/DynamicVehicleScheduling/environment/instance.jl
@@ -14,13 +14,16 @@ Instance data structure for the dynamic vehicle scheduling problem.
     epoch_duration::T = 1.0
     "last epoch index"
     last_epoch::Int
+    "whether to use two-dimensional features"
+    two_dimensional_features::Bool = false
 end
 
 function Instance(
     static_instance::StaticInstance;
     max_requests_per_epoch::Int=10,
     Δ_dispatch::Float64=1.0,
     epoch_duration::Float64=1.0,
+    two_dimensional_features::Bool=false,
 )
     last_epoch = trunc(
         Int,
@@ -35,6 +38,7 @@ function Instance(
         Δ_dispatch=Δ_dispatch,
         epoch_duration=epoch_duration,
         last_epoch=last_epoch,
+        two_dimensional_features=two_dimensional_features,
     )
 end
 
diff --git a/src/DynamicVehicleScheduling/environment/state.jl b/src/DynamicVehicleScheduling/environment/state.jl
@@ -16,6 +16,25 @@ State data structure for the Dynamic Vehicle Scheduling Problem.
     is_postponable::BitVector = falses(0)
 end
 
+function Base.show(io::IO, state::DVSPState)
+    return print(
+        io,
+        "DVSPState(",
+        "current_epoch=",
+        state.current_epoch,
+        ", ",
+        "location_indices=",
+        state.location_indices,
+        ", ",
+        "is_must_dispatch=",
+        state.is_must_dispatch,
+        ", ",
+        "is_postponable=",
+        state.is_postponable,
+        ")",
+    )
+end
+
 function reset_state!(
     state::DVSPState, instance::Instance; indices, service_time, start_time
 )
@@ -189,9 +208,14 @@ function add_new_customers!(
     epoch_duration = instance.epoch_duration
     Δ_dispatch = instance.Δ_dispatch
     planning_start_time = (state.current_epoch - 1) * epoch_duration + Δ_dispatch
-    is_must_dispatch[2:end] .=
-        planning_start_time .+ epoch_duration .+ @view(updated_duration[1, 2:end]) .>
-        updated_start_time[2:end]
+    if state.current_epoch == last_epoch(instance)
+        # If we are in the last epoch, all requests must be dispatched
+        is_must_dispatch[2:end] .= true
+    else
+        is_must_dispatch[2:end] .=
+            planning_start_time .+ epoch_duration .+ @view(updated_duration[1, 2:end]) .>
+            updated_start_time[2:end]
+    end
     is_postponable[2:end] .= .!is_must_dispatch[2:end]
 
     state.is_must_dispatch = is_must_dispatch
diff --git a/src/DynamicVehicleScheduling/policy.jl b/src/DynamicVehicleScheduling/policy.jl
@@ -4,6 +4,7 @@ function greedy_policy(env::DVSPEnv; model_builder=highs_model)
     nb_postponable_requests = sum(is_postponable)
     θ = ones(nb_postponable_requests) * 1e9
     routes = prize_collecting_vsp(θ; instance=state, model_builder)
+    @assert is_feasible(state, routes)
     return routes
 end
 
@@ -12,6 +13,7 @@ function lazy_policy(env::DVSPEnv; model_builder=highs_model)
     nb_postponable_requests = sum(state.is_postponable)
     θ = ones(nb_postponable_requests) * -1e9
     routes = prize_collecting_vsp(θ; instance=state, model_builder)
+    @assert is_feasible(state, routes)
     return routes
 end
 
diff --git a/src/Utils/data_sample.jl b/src/Utils/data_sample.jl
@@ -22,6 +22,23 @@ $TYPEDFIELDS
     instance::I = nothing
 end
 
+function Base.show(io::IO, d::DataSample)
+    fields = String[]
+    if !isnothing(d.x)
+        push!(fields, "x=$(d.x)")
+    end
+    if !isnothing(d.θ_true)
+        push!(fields, "θ_true=$(d.θ_true)")
+    end
+    if !isnothing(d.y_true)
+        push!(fields, "y_true=$(d.y_true)")
+    end
+    if !isnothing(d.instance)
+        push!(fields, "instance=$(d.instance)")
+    end
+    return print(io, "DataSample(", join(fields, ", "), ")")
+end
+
 """
 $TYPEDSIGNATURES
 
diff --git a/src/Utils/policy.jl b/src/Utils/policy.jl
@@ -39,9 +39,12 @@ function run_policy!(policy, env::AbstractEnvironment; kwargs...)
         y = policy(env; kwargs...)
         features, state = observe(env)
         if @isdefined labeled_dataset
-            push!(labeled_dataset, DataSample(; x=features, y_true=y, instance=state))
+            push!(
+                labeled_dataset,
+                DataSample(; x=features, y_true=y, instance=deepcopy(state)),
+            )
         else
-            labeled_dataset = [DataSample(; x=features, y_true=y, instance=state)]
+            labeled_dataset = [DataSample(; x=features, y_true=y, instance=deepcopy(state))]
         end
         reward = step!(env, y)
         total_reward += reward