Merge branch 'main' into StoVSP

BatyLeo · BatyLeo · commit 7e26cc0ba754 · 2024-12-23T13:56:16.000+01:00
diff --git a/src/Argmax/Argmax.jl b/src/Argmax/Argmax.jl
@@ -8,16 +8,18 @@ using Random
 """
 $TYPEDEF
 
-Benchmark problem with an argmax as the CO algorithm.
+Basic benchmark problem with an argmax as the CO algorithm.
 
 # Fields
 $TYPEDFIELDS
 """
-struct ArgmaxBenchmark <: AbstractBenchmark
-    "iinstances dimension, total number of classes"
+struct ArgmaxBenchmark{E} <: AbstractBenchmark
+    "instances dimension, total number of classes"
     instance_dim::Int
     "number of features"
     nb_features::Int
+    "true mapping between features and costs"
+    encoder::E
 end
 
 function Base.show(io::IO, bench::ArgmaxBenchmark)
@@ -27,8 +29,15 @@ function Base.show(io::IO, bench::ArgmaxBenchmark)
     )
 end
 
-function ArgmaxBenchmark(; instance_dim::Int=10, nb_features::Int=5)
-    return ArgmaxBenchmark(instance_dim, nb_features)
+"""
+$TYPEDSIGNATURES
+
+Custom constructor for [`ArgmaxBenchmark`](@ref).
+"""
+function ArgmaxBenchmark(; instance_dim::Int=10, nb_features::Int=5, seed=nothing)
+    Random.seed!(seed)
+    model = Chain(Dense(nb_features => 1; bias=false), vec)
+    return ArgmaxBenchmark(instance_dim, nb_features, model)
 end
 
 """
@@ -45,7 +54,7 @@ end
 """
 $TYPEDSIGNATURES
 
-Return a top k maximizer.
+Return an argmax maximizer.
 """
 function Utils.generate_maximizer(bench::ArgmaxBenchmark)
     return one_hot_argmax
@@ -54,19 +63,21 @@ end
 """
 $TYPEDSIGNATURES
 
-Generate a dataset of labeled instances for the subset selection problem.
-The mapping between features and cost is identity.
+Generate a dataset of labeled instances for the argmax problem.
 """
-function Utils.generate_dataset(bench::ArgmaxBenchmark, dataset_size::Int=10; seed::Int=0)
-    (; instance_dim, nb_features) = bench
+function Utils.generate_dataset(
+    bench::ArgmaxBenchmark, dataset_size::Int=10; seed::Int=0, noise_std=0.0
+)
+    (; instance_dim, nb_features, encoder) = bench
     rng = MersenneTwister(seed)
     features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size]
-    mapping = Chain(Dense(nb_features => 1; bias=false), vec)
-    costs = mapping.(features)
-    solutions = one_hot_argmax.(costs)
+    costs = encoder.(features)
+    noisy_solutions = [
+        one_hot_argmax(θ + noise_std * randn(rng, Float32, instance_dim)) for θ in costs
+    ]
     return [
         DataSample(; x, θ_true, y_true) for
-        (x, θ_true, y_true) in zip(features, costs, solutions)
+        (x, θ_true, y_true) in zip(features, costs, noisy_solutions)
     ]
 end
 
@@ -75,9 +86,9 @@ $TYPEDSIGNATURES
 
 Initialize a linear model for `bench` using `Flux`.
 """
-function Utils.generate_statistical_model(bench::ArgmaxBenchmark; seed=0)
-    Random.seed!(seed)
+function Utils.generate_statistical_model(bench::ArgmaxBenchmark; seed=nothing)
     (; nb_features) = bench
+    Random.seed!(seed)
     return Chain(Dense(nb_features => 1; bias=false), vec)
 end
 
diff --git a/src/Ranking/Ranking.jl b/src/Ranking/Ranking.jl
@@ -8,16 +8,18 @@ using Random
 """
 $TYPEDEF
 
-Benchmark problem with an argmax as the CO algorithm.
+Basic benchmark problem with ranking as the CO algorithm.
 
 # Fields
 $TYPEDFIELDS
 """
-struct RankingBenchmark <: AbstractBenchmark
-    "iinstances dimension, total number of classes"
+struct RankingBenchmark{E} <: AbstractBenchmark
+    "instances dimension, total number of classes"
     instance_dim::Int
     "number of features"
     nb_features::Int
+    "true mapping between features and costs"
+    encoder::E
 end
 
 function Base.show(io::IO, bench::RankingBenchmark)
@@ -27,8 +29,15 @@ function Base.show(io::IO, bench::RankingBenchmark)
     )
 end
 
-function RankingBenchmark(; instance_dim::Int=10, nb_features::Int=5)
-    return RankingBenchmark(instance_dim, nb_features)
+"""
+$TYPEDSIGNATURES
+
+Custom constructor for [`RankingBenchmark`](@ref).
+"""
+function RankingBenchmark(; instance_dim::Int=10, nb_features::Int=5, seed=nothing)
+    Random.seed!(seed)
+    model = Chain(Dense(nb_features => 1; bias=false), vec)
+    return RankingBenchmark(instance_dim, nb_features, model)
 end
 
 """
@@ -43,7 +52,7 @@ end
 """
 $TYPEDSIGNATURES
 
-Return a top k maximizer.
+Return a ranking maximizer.
 """
 function Utils.generate_maximizer(bench::RankingBenchmark)
     return ranking
@@ -52,19 +61,21 @@ end
 """
 $TYPEDSIGNATURES
 
-Generate a dataset of labeled instances for the subset selection problem.
-The mapping between features and cost is identity.
+Generate a dataset of labeled instances for the ranking problem.
 """
-function Utils.generate_dataset(bench::RankingBenchmark, dataset_size::Int=10; seed::Int=0)
-    (; instance_dim, nb_features) = bench
+function Utils.generate_dataset(
+    bench::RankingBenchmark, dataset_size::Int=10; seed::Int=0, noise_std=0.0
+)
+    (; instance_dim, nb_features, encoder) = bench
     rng = MersenneTwister(seed)
     features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size]
-    mapping = Chain(Dense(nb_features => 1; bias=false), vec)
-    costs = mapping.(features)
-    solutions = ranking.(costs)
+    costs = encoder.(features)
+    noisy_solutions = [
+        ranking(θ .+ noise_std * randn(rng, Float32, instance_dim)) for θ in costs
+    ]
     return [
         DataSample(; x, θ_true, y_true) for
-        (x, θ_true, y_true) in zip(features, costs, solutions)
+        (x, θ_true, y_true) in zip(features, costs, noisy_solutions)
     ]
 end
 
@@ -73,9 +84,9 @@ $TYPEDSIGNATURES
 
 Initialize a linear model for `bench` using `Flux`.
 """
-function Utils.generate_statistical_model(bench::RankingBenchmark; seed=0)
-    Random.seed!(seed)
+function Utils.generate_statistical_model(bench::RankingBenchmark; seed=nothing)
     (; nb_features) = bench
+    Random.seed!(seed)
     return Chain(Dense(nb_features => 1; bias=false), vec)
 end