JuliaDecisionFocusedLearning
diff --git a/‎Project.toml‎
Lines changed: 2 additions & 4 deletions b/‎Project.toml‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎docs/src/api/dynamic_assorment.md‎
Lines changed: 15 additions & 0 deletions b/‎docs/src/api/dynamic_assorment.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎docs/src/benchmarks/dynamic_assorment.md‎
Lines changed: 3 additions & 0 deletions b/‎docs/src/benchmarks/dynamic_assorment.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/DecisionFocusedLearningBenchmarks.jl‎
Lines changed: 21 additions & 17 deletions b/‎src/DecisionFocusedLearningBenchmarks.jl‎
Lines changed: 21 additions & 17 deletions
diff --git a/‎src/DynamicAssortment/DynamicAssortment.jl‎
Lines changed: 23 additions & 6 deletions b/‎src/DynamicAssortment/DynamicAssortment.jl‎
Lines changed: 23 additions & 6 deletions
diff --git a/‎src/DynamicAssortment/environment.jl‎
Lines changed: 11 additions & 10 deletions b/‎src/DynamicAssortment/environment.jl‎
Lines changed: 11 additions & 10 deletions
diff --git a/‎src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl‎
Lines changed: 12 additions & 14 deletions b/‎src/DynamicVehicleScheduling/DynamicVehicleScheduling.jl‎
Lines changed: 12 additions & 14 deletions
diff --git a/‎src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl‎
Lines changed: 0 additions & 87 deletions b/‎src/DynamicVehicleScheduling/algorithms/prize_collecting_vsp.jl‎
Lines changed: 0 additions & 87 deletions
@@ -4,9 +4,8 @@ authors = ["Members of JuliaDecisionFocusedLearning"]
 version = "0.2.4"
 
 [deps]
-Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
-CommonRLInterface = "d842c3ba-07a1-494f-bbec-f5741b0a3e98"
 Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
+Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
 ConstrainedShortestPaths = "b3798467-87dc-4d99-943d-35a1bd39e395"
 DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -35,9 +34,8 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
-Combinatorics = "1.0.3"
-CommonRLInterface = "0.3.3"
 Colors = "0.13.1"
+Combinatorics = "1.0.3"
 ConstrainedShortestPaths = "0.6.0"
 DataDeps = "0.7"
 Distributions = "0.25"
 
@@ -0,0 +1,15 @@
+# Dynamic Assortment
+
+## Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment]
+Private = false
+```
+
+## Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment]
+Public = false
+```
@@ -0,0 +1,3 @@
+# Dynamic Assortment
+
+[`DynamicAssortmentBenchmark`](@ref).
@@ -59,23 +59,16 @@ include("DynamicVehicleScheduling/DynamicVehicleScheduling.jl")
 include("DynamicAssortment/DynamicAssortment.jl")
 
 using .Utils
-using .Argmax
-using .Argmax2D
-using .Ranking
-using .SubsetSelection
-using .Warcraft
-using .FixedSizeShortestPath
-using .PortfolioOptimization
-using .StochasticVehicleScheduling
-using .DynamicVehicleScheduling
-using .DynamicAssortment
 
 # Interface
 export AbstractBenchmark, AbstractStochasticBenchmark, AbstractDynamicBenchmark, DataSample
+export AbstractEnv, get_seed, is_terminated, observe, reset!, step!
+
+export Policy, run_policy!
 
 export generate_sample, generate_dataset, generate_environments, generate_environment
 export generate_scenario
-export generate_scenario_generator, generate_anticipative_solver
+export generate_policies
 export generate_statistical_model
 export generate_maximizer, maximizer_kwargs
 export generate_anticipative_solution
@@ -86,15 +79,26 @@ export plot_data, plot_instance, plot_solution
 export compute_gap
 
 # Export all benchmarks
-export ArgmaxBenchmark
+using .Argmax
+using .Argmax2D
+using .Ranking
+using .SubsetSelection
+using .Warcraft
+using .FixedSizeShortestPath
+using .PortfolioOptimization
+using .StochasticVehicleScheduling
+using .DynamicVehicleScheduling
+using .DynamicAssortment
+
 export Argmax2DBenchmark
-export RankingBenchmark
-export SubsetSelectionBenchmark
-export WarcraftBenchmark
+export ArgmaxBenchmark
+export DynamicAssortmentBenchmark
+export DynamicVehicleSchedulingBenchmark
 export FixedSizeShortestPathBenchmark
 export PortfolioOptimizationBenchmark
+export RankingBenchmark
 export StochasticVehicleSchedulingBenchmark
-export DynamicVehicleSchedulingBenchmark
-export DynamicAssortmentBenchmark
+export SubsetSelectionBenchmark
+export WarcraftBenchmark
 
 end # module DecisionFocusedLearningBenchmarks
@@ -2,7 +2,6 @@ module DynamicAssortment
 
 using ..Utils
 
-using CommonRLInterface: CommonRLInterface, AbstractEnv
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
 using Distributions: Uniform, Categorical
 using LinearAlgebra: dot
@@ -62,17 +61,35 @@ function Utils.generate_sample(
     return DataSample(; instance=Instance(b, rng))
 end
 
+function Utils.generate_statistical_model(b::DynamicAssortmentBenchmark; seed=nothing)
+    Random.seed!(seed)
+    d = feature_count(b)
+    return Chain(Dense(d + 8 => 5), Dense(5 => 1), vec)
+end
+
 function Utils.generate_maximizer(b::DynamicAssortmentBenchmark)
     return TopKMaximizer(assortment_size(b))
 end
 
 function Utils.generate_environment(
-    ::DynamicAssortmentBenchmark,
-    instance::Instance;
-    seed=nothing,
-    rng::AbstractRNG=MersenneTwister(seed),
+    ::DynamicAssortmentBenchmark, instance::Instance, rng::AbstractRNG
 )
-    return Environment(instance; seed=seed, rng=rng)
+    seed = rand(rng, 1:typemax(Int))
+    return Environment(instance; seed)
+end
+
+function Utils.generate_policies(b::DynamicAssortmentBenchmark)
+    greedy = Policy(
+        "Greedy",
+        "policy that selects the assortment with items with the highest prices",
+        greedy_policy,
+    )
+    expert = Policy(
+        "Expert",
+        "policy that selects the assortment with the highest expected revenue",
+        expert_policy,
+    )
+    return (expert, greedy)
 end
 
 export DynamicAssortmentBenchmark
 
@@ -7,7 +7,7 @@ Environment for the dynamic assortment problem.
 $TYPEDFIELDS
 """
 @kwdef mutable struct Environment{I<:Instance,R<:AbstractRNG,S<:Union{Nothing,Int}} <:
-                      AbstractEnv
+                      Utils.AbstractEnvironment
     "associated instance"
     instance::I
     "current step"
@@ -43,23 +43,22 @@ function Environment(instance::Instance; seed=0, rng::AbstractRNG=MersenneTwiste
         features=full_features,
         d_features=zeros(2, N),
     )
-    CommonRLInterface.reset!(env; reset_seed=true)
+    Utils.reset!(env; reset_seed=true)
     return env
 end
 
+Utils.get_seed(env::Environment) = env.seed
 customer_choice_model(b::Environment) = customer_choice_model(b.instance)
 item_count(b::Environment) = item_count(b.instance)
 feature_count(b::Environment) = feature_count(b.instance)
 assortment_size(b::Environment) = assortment_size(b.instance)
 max_steps(b::Environment) = max_steps(b.instance)
 prices(b::Environment) = b.instance.prices
-# features(b::Environment) = b.instance.features
-# starting_hype_and_saturation(b::Environment) = b.instance.starting_hype_and_saturation
 
 ## Basic operations of environment
 
 # Reset the environment
-function CommonRLInterface.reset!(env::Environment; reset_seed=false, seed=env.seed)
+function Utils.reset!(env::Environment; reset_seed=false, seed=env.seed)
     reset_seed && Random.seed!(env.rng, seed)
 
     env.step = 1
@@ -79,18 +78,19 @@ function CommonRLInterface.reset!(env::Environment; reset_seed=false, seed=env.s
     return nothing
 end
 
-function CommonRLInterface.terminated(env::Environment)
+function Utils.is_terminated(env::Environment)
     return env.step > max_steps(env)
 end
 
-function CommonRLInterface.observe(env::Environment)
+function Utils.observe(env::Environment)
     delta_features = env.features[2:3, :] .- env.instance.starting_hype_and_saturation
     return vcat(
         env.features,
         env.d_features,
         delta_features,
         ones(1, item_count(env)) .* (env.step / max_steps(env) * 10),
-    ) #./ 10
+    ) ./ 10,
+    nothing
 end
 
 # Compute the hype vector
@@ -149,9 +149,10 @@ function choice_probabilities(env::Environment, S)
 end
 
 # Purchase decision
-function CommonRLInterface.act!(env::Environment, S)
+function Utils.step!(env::Environment, assortment)
+    @assert !Utils.is_terminated(env) "Environment is terminated, cannot act!"
     r = prices(env)
-    probs = choice_probabilities(env, S)
+    probs = choice_probabilities(env, assortment)
     item = rand(env.rng, Categorical(probs))
     reward = r[item]
     buy_item!(env, item)
 
@@ -3,7 +3,6 @@ module DynamicVehicleScheduling
 using ..Utils
 
 using Base: @kwdef
-using CommonRLInterface: CommonRLInterface, AbstractEnv, reset!, terminated, observe, act!
 using DataDeps: @datadep_str
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
 using Graphs
@@ -39,11 +38,11 @@ include("algorithms/anticipative_solver.jl")
 include("learning/features.jl")
 include("learning/2d_features.jl")
 
-include("policy/abstract_vsp_policy.jl")
-include("policy/greedy_policy.jl")
-include("policy/lazy_policy.jl")
-include("policy/anticipative_policy.jl")
-include("policy/kleopatra_policy.jl")
+# include("policy/abstract_vsp_policy.jl")
+# include("policy/greedy_policy.jl")
+# include("policy/lazy_policy.jl")
+# include("policy/anticipative_policy.jl")
+# include("policy/kleopatra_policy.jl")
 
 include("maximizer.jl")
 
@@ -56,13 +55,13 @@ Abstract type for dynamic vehicle scheduling benchmarks.
 $TYPEDFIELDS
 """
 @kwdef struct DynamicVehicleSchedulingBenchmark <: AbstractDynamicBenchmark{true}
-    "todo"
+    "maximum number of customers entering the system per epoch"
     max_requests_per_epoch::Int = 10
-    "todo"
+    "time between decision and dispatch of a vehicle"
     Δ_dispatch::Float64 = 1.0
-    "todo"
+    "duration of an epoch"
     epoch_duration::Float64 = 1.0
-    "todo"
+    "whether to use two-dimensional features"
     two_dimensional_features::Bool = false
 end
 
@@ -83,9 +82,10 @@ function Utils.generate_dataset(b::DynamicVehicleSchedulingBenchmark, dataset_si
 end
 
 function Utils.generate_environment(
-    ::DynamicVehicleSchedulingBenchmark, instance::Instance; kwargs...
+    ::DynamicVehicleSchedulingBenchmark, instance::Instance, rng::AbstractRNG
 )
-    return DVSPEnv(instance; kwargs...)
+    seed = rand(rng, 1:typemax(Int))
+    return DVSPEnv(instance; seed)
 end
 
 function Utils.generate_maximizer(::DynamicVehicleSchedulingBenchmark)
@@ -105,7 +105,5 @@ function Utils.generate_anticipative_solution(
 end
 
 export DynamicVehicleSchedulingBenchmark
-export run_policy!,
-    GreedyVSPPolicy, LazyVSPPolicy, KleopatraVSPPolicy, AnticipativeVSPPolicy
 
 end
@@ -126,90 +126,3 @@ function prize_collecting_vsp(
 
     return retrieve_routes(value.(y), graph)
 end
-
-# # ?
-# function prize_collecting_vsp_Q(
-#     θ::AbstractVector,
-#     vals::AbstractVector;
-#     instance::DVSPState,
-#     model_builder=highs_model,
-#     kwargs...,
-# )
-#     (; duration) = instance.instance
-#     graph = create_graph(instance)
-#     model = model_builder()
-#     set_silent(model)
-#     nb_nodes = nv(graph)
-#     job_indices = 2:(nb_nodes)
-#     @variable(model, y[i=1:nb_nodes, j=1:nb_nodes; has_edge(graph, i, j)] >= 0)
-#     θ_ext = fill(0.0, location_count(instance.instance))  # no prize for must dispatch requests, only hard constraints
-#     θ_ext[instance.is_postponable] .= θ
-#     # v_ext = fill(0.0, nb_locations(instance.instance))  # no prize for must dispatch requests, only hard constraints
-#     # v_ext[instance.is_postponable] .= vals
-#     @objective(
-#         model,
-#         Max,
-#         sum(
-#             (θ_ext[dst(edge)] + vals[dst(edge)] - duration[src(edge), dst(edge)]) *
-#             y[src(edge), dst(edge)] for edge in edges(graph)
-#         )
-#     )
-#     @constraint(
-#         model,
-#         flow[i in 2:nb_nodes],
-#         sum(y[j, i] for j in inneighbors(graph, i)) ==
-#             sum(y[i, j] for j in outneighbors(graph, i))
-#     )
-#     @constraint(
-#         model, demand[i in job_indices], sum(y[j, i] for j in inneighbors(graph, i)) <= 1
-#     )
-#     # must dispatch constraints
-#     @constraint(
-#         model,
-#         demand_must_dispatch[i in job_indices; instance.is_must_dispatch[i]],
-#         sum(y[j, i] for j in inneighbors(graph, i)) == 1
-#     )
-#     optimize!(model)
-#     return retrieve_routes(value.(y), graph)
-# end
-
-# function my_objective_value(θ, routes; instance)
-#     (; duration) = instance.instance
-#     total = 0.0
-#     θ_ext = fill(0.0, location_count(instance))
-#     θ_ext[instance.is_postponable] .= θ
-#     for route in routes
-#         for (u, v) in partition(vcat(1, route), 2, 1)
-#             total += θ_ext[v] - duration[u, v]
-#         end
-#     end
-#     return -total
-# end
-
-# function _objective_value(θ, routes; instance)
-#     (; duration) = instance.instance
-#     total = 0.0
-#     θ_ext = fill(0.0, location_count(instance))
-#     θ_ext[instance.is_postponable] .= θ
-#     mapping = cumsum(instance.is_postponable)
-#     g = falses(length(θ))
-#     for route in routes
-#         for (u, v) in partition(vcat(1, route), 2, 1)
-#             total -= duration[u, v]
-#             if instance.is_postponable[v]
-#                 total += θ_ext[v]
-#                 g[mapping[v]] = 1
-#             end
-#         end
-#     end
-#     return -total, g
-# end
-
-# function ChainRulesCore.rrule(::typeof(my_objective_value), θ, routes; instance)
-#     total, g = _objective_value(θ, routes; instance)
-#     function pullback(dy)
-#         g = g .* dy
-#         return NoTangent(), g, NoTangent()
-#     end
-#     return total, pullback
-# end
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Dynamic Assortment`
	`2`	`+`
	`3`	+[`DynamicAssortmentBenchmark`](@ref).