JuliaDecisionFocusedLearning
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 12 additions & 1 deletion b/‎README.md‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎docs/src/api/argmax.md‎
Lines changed: 15 additions & 0 deletions b/‎docs/src/api/argmax.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎docs/src/api/ranking.md‎
Lines changed: 15 additions & 0 deletions b/‎docs/src/api/ranking.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎docs/src/benchmarks/argmax.md‎ b/‎docs/src/benchmarks/argmax.md‎
diff --git a/‎docs/src/benchmarks/ranking.md‎ b/‎docs/src/benchmarks/ranking.md‎
diff --git a/‎docs/src/tutorials/warcraft.jl‎
Lines changed: 7 additions & 7 deletions b/‎docs/src/tutorials/warcraft.jl‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/Argmax/Argmax.jl‎
Lines changed: 86 additions & 0 deletions b/‎src/Argmax/Argmax.jl‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎src/DecisionFocusedLearningBenchmarks.jl‎
Lines changed: 9 additions & 3 deletions b/‎src/DecisionFocusedLearningBenchmarks.jl‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎src/FixedSizeShortestPath/FixedSizeShortestPath.jl‎
Lines changed: 4 additions & 1 deletion b/‎src/FixedSizeShortestPath/FixedSizeShortestPath.jl‎
Lines changed: 4 additions & 1 deletion
@@ -3,6 +3,7 @@
 data
 scripts
 *heuristic_algorithms
+.DS_Store
 
 # Files generated by invoking Julia with --code-coverage
 *.jl.cov
 
@@ -1,8 +1,19 @@
 # DecisionFocusedLearningBenchmarks.jl
 
+[![Stable](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaDecisionFocusedLearning.github.io/DecisionFocusedLearningBenchmarks.jl/stable/)
 [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaDecisionFocusedLearning.github.io/DecisionFocusedLearningBenchmarks.jl/dev/)
 [![Build Status](https://github.com/JuliaDecisionFocusedLearning/DecisionFocusedLearningBenchmarks.jl/actions/workflows/Test.yml/badge.svg?branch=main)](https://github.com/JuliaDecisionFocusedLearning/DecisionFocusedLearningBenchmarks.jl/actions/workflows/Test.yml?query=branch%3Amain)
 [![Coverage](https://codecov.io/gh/JuliaDecisionFocusedLearning/DecisionFocusedLearningBenchmarks.jl/branch/main/graph/badge.svg)](https://app.codecov.io/gh/JuliaDecisionFocusedLearning/DecisionFocusedLearningBenchmarks.jl)
 [![Code Style: Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/JuliaDiff/BlueStyle)
 
-Set of benchmark problems to be solved with [DecisionFocusedLearning.jl](https://github.com/JuliaDecisionFocusedLearning/DecisionFocusedLearning.jl)
+This repository contains a collection of benchmark problems for decision-focused learning algorithms.
+It provides a common interface for creating datasets, associated statistical models and combinatorial optimization maximizers for building decision-focused learning pipelines.
+They can be used for instance as benchmarks for tools in [InferOpt.jl](https://github.com/JuliaDecisionFocusedLearning/InferOpt.jl), but can be used in any other context as well.
+
+Currently, this package provides the following benchmark problems (many more to come!):
+- `SubsetSelectionBenchmark`: a minimalist subset selection problem.
+- `FixedSizeShortestPathBenchmark`: shortest path problem with on a graph with fixed size.
+- `WarcraftBenchmark`: shortest path problem on image maps
+- `PortfolioOptimizationBenchmark`: portfolio optimization problem.
+
+See the [documentation](https://JuliaDecisionFocusedLearning.github.io/DecisionFocusedLearningBenchmarks.jl/stable/) for more details.
@@ -0,0 +1,15 @@
+# Argmax
+
+## Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Argmax]
+Private = false
+```
+
+## Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Argmax]
+Public = false
+```
@@ -0,0 +1,15 @@
+# Ranking
+
+## Public
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Ranking]
+Private = false
+```
+
+## Private
+
+```@autodocs
+Modules = [DecisionFocusedLearningBenchmarks.Ranking]
+Public = false
+```
@@ -25,10 +25,10 @@ train_dataset, test_dataset = dataset[1:45], dataset[46:50]
 sample = test_dataset[1]
 # `x` correspond to the input features, i.e. the input image (3D array) in the Warcraft benchmark case:
 x = sample.x
-# `θ` correspond to the true unknown terrain weights. We use the opposite of the true weights in order to formulate the optimization problem as a maximization problem:
-θ_true = sample.θ
-# `y` correspond to the optimal shortest path, encoded as a binary matrix:
-y_true = sample.y
+# `θ_true` correspond to the true unknown terrain weights. We use the opposite of the true weights in order to formulate the optimization problem as a maximization problem:
+θ_true = sample.θ_true
+# `y_true` correspond to the optimal shortest path, encoded as a binary matrix:
+y_true = sample.y_true
 # `instance` is not used in this benchmark, therefore set to nothing:
 isnothing(sample.instance)
 
@@ -50,7 +50,7 @@ maximizer = generate_maximizer(b; dijkstra=true)
 # In the case o fthe Warcraft benchmark, the method has an additional keyword argument to chose the algorithm to use: Dijkstra's algorithm or Bellman-Ford algorithm.
 y = maximizer(θ)
 # As we can see, currently the pipeline predicts random noise as cell weights, and therefore the maximizer returns a straight line path.
-plot_data(b, DataSample(; x, θ, y))
+plot_data(b, DataSample(; x, θ_true=θ, y_true=y))
 # We can evaluate the current pipeline performance using the optimality gap metric:
 starting_gap = compute_gap(b, test_dataset, model, maximizer)
 
@@ -70,7 +70,7 @@ opt_state = Flux.setup(Adam(1e-3), model)
 loss_history = Float64[]
 for epoch in 1:50
     val, grads = Flux.withgradient(model) do m
-        sum(loss(m(sample.x), sample.y) for sample in train_dataset) / length(train_dataset)
+        sum(loss(m(x), y_true) for (; x, y_true) in train_dataset) / length(train_dataset)
     end
     Flux.update!(opt_state, model, grads[1])
     push!(loss_history, val)
@@ -85,4 +85,4 @@ final_gap = compute_gap(b, test_dataset, model, maximizer)
 #
 θ = model(x)
 y = maximizer(θ)
-plot_data(b, DataSample(; x, θ, y))
+plot_data(b, DataSample(; x, θ_true=θ, y_true=y))
@@ -0,0 +1,86 @@
+module Argmax
+
+using ..Utils
+using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
+using Flux: Chain, Dense
+using Random
+
+"""
+$TYPEDEF
+
+Benchmark problem with an argmax as the CO algorithm.
+
+# Fields
+$TYPEDFIELDS
+"""
+struct ArgmaxBenchmark <: AbstractBenchmark
+    "iinstances dimension, total number of classes"
+    instance_dim::Int
+    "number of features"
+    nb_features::Int
+end
+
+function Base.show(io::IO, bench::ArgmaxBenchmark)
+    (; instance_dim, nb_features) = bench
+    return print(
+        io, "ArgmaxBenchmark(instance_dim=$instance_dim, nb_features=$nb_features)"
+    )
+end
+
+function ArgmaxBenchmark(; instance_dim::Int=10, nb_features::Int=5)
+    return ArgmaxBenchmark(instance_dim, nb_features)
+end
+
+"""
+$TYPEDSIGNATURES
+
+One-hot encoding of the argmax function.
+"""
+function one_hot_argmax(z::AbstractVector{R}; kwargs...) where {R<:Real}
+    e = zeros(R, length(z))
+    e[argmax(z)] = one(R)
+    return e
+end
+
+"""
+$TYPEDSIGNATURES
+
+Return a top k maximizer.
+"""
+function Utils.generate_maximizer(bench::ArgmaxBenchmark)
+    return one_hot_argmax
+end
+
+"""
+$TYPEDSIGNATURES
+
+Generate a dataset of labeled instances for the subset selection problem.
+The mapping between features and cost is identity.
+"""
+function Utils.generate_dataset(bench::ArgmaxBenchmark, dataset_size::Int=10; seed::Int=0)
+    (; instance_dim, nb_features) = bench
+    rng = MersenneTwister(seed)
+    features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size]
+    mapping = Chain(Dense(nb_features => 1; bias=false), vec)
+    costs = mapping.(features)
+    solutions = one_hot_argmax.(costs)
+    return [
+        DataSample(; x, θ_true, y_true) for
+        (x, θ_true, y_true) in zip(features, costs, solutions)
+    ]
+end
+
+"""
+$TYPEDSIGNATURES
+
+Initialize a linear model for `bench` using `Flux`.
+"""
+function Utils.generate_statistical_model(bench::ArgmaxBenchmark; seed=0)
+    Random.seed!(seed)
+    (; nb_features) = bench
+    return Chain(Dense(nb_features => 1; bias=false), vec)
+end
+
+export ArgmaxBenchmark
+
+end
@@ -23,18 +23,22 @@ end
 
 include("Utils/Utils.jl")
 
+include("Argmax/Argmax.jl")
+include("Ranking/Ranking.jl")
+include("SubsetSelection/SubsetSelection.jl")
 include("Warcraft/Warcraft.jl")
 include("FixedSizeShortestPath/FixedSizeShortestPath.jl")
 include("PortfolioOptimization/PortfolioOptimization.jl")
-include("SubsetSelection/SubsetSelection.jl")
 
 include("StochasticVehicleScheduling/StochasticVehicleScheduling.jl")
 
 using .Utils
+using .Argmax
+using .Ranking
+using .SubsetSelection
 using .Warcraft
 using .FixedSizeShortestPath
 using .PortfolioOptimization
-using .SubsetSelection
 using .StochasticVehicleScheduling
 
 # Interface
@@ -46,10 +50,12 @@ export plot_data
 export compute_gap
 
 # Export all benchmarks
+export ArgmaxBenchmark
+export RankingBenchmark
+export SubsetSelectionBenchmark
 export WarcraftBenchmark
 export FixedSizeShortestPathBenchmark
 export PortfolioOptimizationBenchmark
-export SubsetSelectionBenchmark
 export StochasticVehicleSchedulingBenchmark
 
 end # module DecisionFocusedLearningBenchmarks
@@ -132,7 +132,10 @@ function Utils.generate_dataset(
 
     # Label solutions
     solutions = shortest_path_maximizer.(costs)
-    return [DataSample(; x=x, θ=θ, y=y) for (x, θ, y) in zip(features, costs, solutions)]
+    return [
+        DataSample(; x, θ_true, y_true) for
+        (x, θ_true, y_true) in zip(features, costs, solutions)
+    ]
 end
 
 """