diff --git a/.gitignore b/.gitignore index 69ae44f..f65c259 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ /docs/src/index.md data scripts +.DS_Store # Files generated by invoking Julia with --code-coverage *.jl.cov diff --git a/docs/src/api/argmax.md b/docs/src/api/argmax.md new file mode 100644 index 0000000..6ea12e4 --- /dev/null +++ b/docs/src/api/argmax.md @@ -0,0 +1,15 @@ +# Argmax + +## Public + +```@autodocs +Modules = [DecisionFocusedLearningBenchmarks.Argmax] +Private = false +``` + +## Private + +```@autodocs +Modules = [DecisionFocusedLearningBenchmarks.Argmax] +Public = false +``` diff --git a/docs/src/api/ranking.md b/docs/src/api/ranking.md new file mode 100644 index 0000000..f249a48 --- /dev/null +++ b/docs/src/api/ranking.md @@ -0,0 +1,15 @@ +# Ranking + +## Public + +```@autodocs +Modules = [DecisionFocusedLearningBenchmarks.Ranking] +Private = false +``` + +## Private + +```@autodocs +Modules = [DecisionFocusedLearningBenchmarks.Ranking] +Public = false +``` diff --git a/docs/src/benchmarks/argmax.md b/docs/src/benchmarks/argmax.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/src/benchmarks/ranking.md b/docs/src/benchmarks/ranking.md new file mode 100644 index 0000000..e69de29 diff --git a/src/Argmax/Argmax.jl b/src/Argmax/Argmax.jl new file mode 100644 index 0000000..d44e6e1 --- /dev/null +++ b/src/Argmax/Argmax.jl @@ -0,0 +1,86 @@ +module Argmax + +using ..Utils +using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES +using Flux: Chain, Dense +using Random + +""" +$TYPEDEF + +Benchmark problem with an argmax as the CO algorithm. + +# Fields +$TYPEDFIELDS +""" +struct ArgmaxBenchmark <: AbstractBenchmark + "iinstances dimension, total number of classes" + instance_dim::Int + "number of features" + nb_features::Int +end + +function Base.show(io::IO, bench::ArgmaxBenchmark) + (; instance_dim, nb_features) = bench + return print( + io, "ArgmaxBenchmark(instance_dim=$instance_dim, nb_features=$nb_features)" + ) +end + +function ArgmaxBenchmark(; instance_dim::Int=10, nb_features::Int=5) + return ArgmaxBenchmark(instance_dim, nb_features) +end + +""" +$TYPEDSIGNATURES + +One-hot encoding of the argmax function. +""" +function one_hot_argmax(z::AbstractVector{R}; kwargs...) where {R<:Real} + e = zeros(R, length(z)) + e[argmax(z)] = one(R) + return e +end + +""" +$TYPEDSIGNATURES + +Return a top k maximizer. +""" +function Utils.generate_maximizer(bench::ArgmaxBenchmark) + return one_hot_argmax +end + +""" +$TYPEDSIGNATURES + +Generate a dataset of labeled instances for the subset selection problem. +The mapping between features and cost is identity. +""" +function Utils.generate_dataset(bench::ArgmaxBenchmark, dataset_size::Int=10; seed::Int=0) + (; instance_dim, nb_features) = bench + rng = MersenneTwister(seed) + features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size] + mapping = Chain(Dense(nb_features => 1; bias=false), vec) + costs = mapping.(features) + solutions = one_hot_argmax.(costs) + return [ + DataSample(; x, θ_true, y_true) for + (x, θ_true, y_true) in zip(features, costs, solutions) + ] +end + +""" +$TYPEDSIGNATURES + +Initialize a linear model for `bench` using `Flux`. +""" +function Utils.generate_statistical_model(bench::ArgmaxBenchmark; seed=0) + Random.seed!(seed) + (; nb_features) = bench + return Chain(Dense(nb_features => 1; bias=false), vec) +end + +export ArgmaxBenchmark + +end diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl index 071a15c..21265e3 100644 --- a/src/DecisionFocusedLearningBenchmarks.jl +++ b/src/DecisionFocusedLearningBenchmarks.jl @@ -19,16 +19,20 @@ end include("Utils/Utils.jl") +include("Argmax/Argmax.jl") +include("Ranking/Ranking.jl") +include("SubsetSelection/SubsetSelection.jl") include("Warcraft/Warcraft.jl") include("FixedSizeShortestPath/FixedSizeShortestPath.jl") include("PortfolioOptimization/PortfolioOptimization.jl") -include("SubsetSelection/SubsetSelection.jl") using .Utils +using .Argmax +using .Ranking +using .SubsetSelection using .Warcraft using .FixedSizeShortestPath using .PortfolioOptimization -using .SubsetSelection # Interface export AbstractBenchmark, DataSample @@ -39,9 +43,11 @@ export plot_data export compute_gap # Export all benchmarks +export ArgmaxBenchmark +export RankingBenchmark +export SubsetSelectionBenchmark export WarcraftBenchmark export FixedSizeShortestPathBenchmark export PortfolioOptimizationBenchmark -export SubsetSelectionBenchmark end # module DecisionFocusedLearningBenchmarks diff --git a/src/Ranking/Ranking.jl b/src/Ranking/Ranking.jl new file mode 100644 index 0000000..bbce752 --- /dev/null +++ b/src/Ranking/Ranking.jl @@ -0,0 +1,84 @@ +module Ranking + +using ..Utils +using DocStringExtensions: TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES +using Flux: Chain, Dense +using Random + +""" +$TYPEDEF + +Benchmark problem with an argmax as the CO algorithm. + +# Fields +$TYPEDFIELDS +""" +struct RankingBenchmark <: AbstractBenchmark + "iinstances dimension, total number of classes" + instance_dim::Int + "number of features" + nb_features::Int +end + +function Base.show(io::IO, bench::RankingBenchmark) + (; instance_dim, nb_features) = bench + return print( + io, "RankingBenchmark(instance_dim=$instance_dim, nb_features=$nb_features)" + ) +end + +function RankingBenchmark(; instance_dim::Int=10, nb_features::Int=5) + return RankingBenchmark(instance_dim, nb_features) +end + +""" +$TYPEDSIGNATURES + +Compute the vector `r` such that `rᵢ` is the rank of `θᵢ` in `θ`. +""" +function ranking(θ::AbstractVector; rev::Bool=false, kwargs...) + return invperm(sortperm(θ; rev=rev)) +end + +""" +$TYPEDSIGNATURES + +Return a top k maximizer. +""" +function Utils.generate_maximizer(bench::RankingBenchmark) + return ranking +end + +""" +$TYPEDSIGNATURES + +Generate a dataset of labeled instances for the subset selection problem. +The mapping between features and cost is identity. +""" +function Utils.generate_dataset(bench::RankingBenchmark, dataset_size::Int=10; seed::Int=0) + (; instance_dim, nb_features) = bench + rng = MersenneTwister(seed) + features = [randn(rng, Float32, nb_features, instance_dim) for _ in 1:dataset_size] + mapping = Chain(Dense(nb_features => 1; bias=false), vec) + costs = mapping.(features) + solutions = ranking.(costs) + return [ + DataSample(; x, θ_true, y_true) for + (x, θ_true, y_true) in zip(features, costs, solutions) + ] +end + +""" +$TYPEDSIGNATURES + +Initialize a linear model for `bench` using `Flux`. +""" +function Utils.generate_statistical_model(bench::RankingBenchmark; seed=0) + Random.seed!(seed) + (; nb_features) = bench + return Chain(Dense(nb_features => 1; bias=false), vec) +end + +export RankingBenchmark + +end diff --git a/test/argmax.jl b/test/argmax.jl new file mode 100644 index 0000000..28f404b --- /dev/null +++ b/test/argmax.jl @@ -0,0 +1,31 @@ +@testitem "Argmax" begin + using DecisionFocusedLearningBenchmarks + + instance_dim = 10 + nb_features = 5 + + b = ArgmaxBenchmark(; instance_dim=instance_dim, nb_features=nb_features) + + io = IOBuffer() + show(io, b) + @test String(take!(io)) == "ArgmaxBenchmark(instance_dim=10, nb_features=5)" + + dataset = generate_dataset(b, 50) + model = generate_statistical_model(b) + maximizer = generate_maximizer(b) + + for (i, sample) in enumerate(dataset) + (; x, θ_true, y_true) = sample + @test size(x) == (nb_features, instance_dim) + @test length(θ_true) == instance_dim + @test length(y_true) == instance_dim + @test isnothing(sample.instance) + @test all(y_true .== maximizer(θ_true)) + + θ = model(x) + @test length(θ) == instance_dim + + y = maximizer(θ) + @test length(y) == instance_dim + end +end diff --git a/test/ranking.jl b/test/ranking.jl new file mode 100644 index 0000000..1756733 --- /dev/null +++ b/test/ranking.jl @@ -0,0 +1,31 @@ +@testitem "Ranking" begin + using DecisionFocusedLearningBenchmarks + + instance_dim = 10 + nb_features = 5 + + b = RankingBenchmark(; instance_dim=instance_dim, nb_features=nb_features) + + io = IOBuffer() + show(io, b) + @test String(take!(io)) == "RankingBenchmark(instance_dim=10, nb_features=5)" + + dataset = generate_dataset(b, 50) + model = generate_statistical_model(b) + maximizer = generate_maximizer(b) + + for (i, sample) in enumerate(dataset) + (; x, θ_true, y_true) = sample + @test size(x) == (nb_features, instance_dim) + @test length(θ_true) == instance_dim + @test length(y_true) == instance_dim + @test isnothing(sample.instance) + @test all(y_true .== maximizer(θ_true)) + + θ = model(x) + @test length(θ) == instance_dim + + y = maximizer(θ) + @test length(y) == instance_dim + end +end