From 86777cab8e54fc542002ebf797a61caa99495e30 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 17 Oct 2024 17:30:23 +0200 Subject: [PATCH 1/7] start adding some description in the documenttaion for each benchmark pb --- docs/make.jl | 8 +++++++- docs/src/benchmarks/fixed_size_shortest_path.md | 3 +++ docs/src/benchmarks/portfolio_optimization.md | 3 +++ docs/src/benchmarks/subset_selection.md | 9 +++++++++ docs/src/benchmarks/warcraft.md | 3 +++ 5 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 docs/src/benchmarks/fixed_size_shortest_path.md create mode 100644 docs/src/benchmarks/portfolio_optimization.md create mode 100644 docs/src/benchmarks/subset_selection.md create mode 100644 docs/src/benchmarks/warcraft.md diff --git a/docs/make.jl b/docs/make.jl index 6a818c8..f58d619 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -21,7 +21,13 @@ makedocs(; format=Documenter.HTML(), pages=[ "Home" => "index.md", - "Tutorials" => md_tutorial_files, + # "Tutorials" => md_tutorial_files, + "Benchmark problems list" => [ + "benchmarks/subset_selection.md", + "benchmarks/portfolio_optimization.md", + "benchmarks/fixed_size_shortest_path.md", + "benchmarks/warcraft.md", + ], "API reference" => ["api/interface.md", "api/decision_focused.md", "api/warcraft.md"], ], diff --git a/docs/src/benchmarks/fixed_size_shortest_path.md b/docs/src/benchmarks/fixed_size_shortest_path.md new file mode 100644 index 0000000..de4fbc3 --- /dev/null +++ b/docs/src/benchmarks/fixed_size_shortest_path.md @@ -0,0 +1,3 @@ +# Shortest paths + +[`FixedSizeShortestPathBenchmark`](@ref) diff --git a/docs/src/benchmarks/portfolio_optimization.md b/docs/src/benchmarks/portfolio_optimization.md new file mode 100644 index 0000000..9fd59b8 --- /dev/null +++ b/docs/src/benchmarks/portfolio_optimization.md @@ -0,0 +1,3 @@ +# Portfolio Optimization + +[`PortfolioOptimizationBenchmark`](@ref) diff --git a/docs/src/benchmarks/subset_selection.md b/docs/src/benchmarks/subset_selection.md new file mode 100644 index 0000000..ac5886a --- /dev/null +++ b/docs/src/benchmarks/subset_selection.md @@ -0,0 +1,9 @@ +# Subset Selection + +[`SubsetSelectionBenchmark`](@ref) is a very simple benchmark problem of subset selection. + +We have a set of ``n`` items, each item having an `unknown' value. +We want to select a subset of ``k`` items that maximizes the sum of the values of the selected items. + +As input, we are given a feature vector, that contains exactly the value of each item. +The goal is to learn the identity mapping between the feature vector and the value of the items. diff --git a/docs/src/benchmarks/warcraft.md b/docs/src/benchmarks/warcraft.md new file mode 100644 index 0000000..3105a40 --- /dev/null +++ b/docs/src/benchmarks/warcraft.md @@ -0,0 +1,3 @@ +# Warcraft + +[`WarcraftBenchmark`](@ref) \ No newline at end of file From 7b47d135dbc58315ccd511eb88ee98d624e2c0de Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Fri, 25 Oct 2024 17:05:50 +0200 Subject: [PATCH 2/7] new optional parameter for subset selection --- docs/make.jl | 20 +++++++++++++------- docs/src/benchmarks/subset_selection.md | 12 ++++++++---- src/SubsetSelection/SubsetSelection.jl | 14 +++++++++++--- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index f58d619..e3b3928 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -9,9 +9,13 @@ tutorial_dir = joinpath(@__DIR__, "src", "tutorials") tutorial_files = readdir(tutorial_dir) md_tutorial_files = [split(file, ".")[1] * ".md" for file in tutorial_files] -for file in tutorial_files - filepath = joinpath(tutorial_dir, file) - Literate.markdown(filepath, md_dir; documenter=true, execute=false) +include_tutorial = false + +if include_tutorial + for file in tutorial_files + filepath = joinpath(tutorial_dir, file) + Literate.markdown(filepath, md_dir; documenter=true, execute=false) + end end makedocs(; @@ -21,7 +25,7 @@ makedocs(; format=Documenter.HTML(), pages=[ "Home" => "index.md", - # "Tutorials" => md_tutorial_files, + "Tutorials" => include_tutorial ? md_tutorial_files : [], "Benchmark problems list" => [ "benchmarks/subset_selection.md", "benchmarks/portfolio_optimization.md", @@ -33,9 +37,11 @@ makedocs(; ], ) -for file in md_tutorial_files - filepath = joinpath(md_dir, file) - rm(filepath) +if include_tutorial + for file in md_tutorial_files + filepath = joinpath(md_dir, file) + rm(filepath) + end end deploydocs(; diff --git a/docs/src/benchmarks/subset_selection.md b/docs/src/benchmarks/subset_selection.md index ac5886a..7415ed9 100644 --- a/docs/src/benchmarks/subset_selection.md +++ b/docs/src/benchmarks/subset_selection.md @@ -1,9 +1,13 @@ # Subset Selection -[`SubsetSelectionBenchmark`](@ref) is a very simple benchmark problem of subset selection. +[`SubsetSelectionBenchmark`](@ref) is the most trivial benchmark problem in this package. +It is minimalistic and serves as a simple example for debugging and testing purposes. -We have a set of ``n`` items, each item having an `unknown' value. +## Description +We have a set of ``n`` items, each item having an unknown value. We want to select a subset of ``k`` items that maximizes the sum of the values of the selected items. -As input, we are given a feature vector, that contains exactly the value of each item. -The goal is to learn the identity mapping between the feature vector and the value of the items. +As input, instead of the items costs, we are given a feature vector, such that an unknown linear mapping between the feature vector and the value of the items exists. + +By default, this linear mapping is the identity mapping, i.e., the value of each item is equal to the value of the corresponding feature vector element. +However, this mapping can be changed by setting the `identity_mapping` parameter to false. diff --git a/src/SubsetSelection/SubsetSelection.jl b/src/SubsetSelection/SubsetSelection.jl index cc32190..64d353d 100644 --- a/src/SubsetSelection/SubsetSelection.jl +++ b/src/SubsetSelection/SubsetSelection.jl @@ -58,13 +58,21 @@ Generate a dataset of labeled instances for the subset selection problem. The mapping between features and cost is identity. """ function Utils.generate_dataset( - bench::SubsetSelectionBenchmark, dataset_size::Int=10; seed::Int=0 + bench::SubsetSelectionBenchmark, + dataset_size::Int=10; + seed::Int=0, + identity_mapping=true, ) (; n, k) = bench rng = MersenneTwister(seed) features = [randn(rng, Float32, n) for _ in 1:dataset_size] - costs = copy(features) # we assume that the cost is the same as the feature - solutions = top_k.(features, k) + costs = if identity_mapping + copy(features) # we assume that the cost is the same as the feature + else + mapping = Dense(n => n; bias=false) + mapping.(features) + end + solutions = top_k.(costs, k) return [DataSample(; x=x, θ=θ, y=y) for (x, θ, y) in zip(features, costs, solutions)] end From 333fe81a41edcf09cd965c92ddfab53c07b1c57f Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Fri, 25 Oct 2024 17:24:15 +0200 Subject: [PATCH 3/7] update --- docs/make.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/make.jl b/docs/make.jl index e3b3928..431e05a 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -6,10 +6,14 @@ cp(joinpath(@__DIR__, "..", "README.md"), joinpath(@__DIR__, "src", "index.md"); md_dir = joinpath(@__DIR__, "src") tutorial_dir = joinpath(@__DIR__, "src", "tutorials") +benchmarks_dir = joinpath(@__DIR__, "src", "benchmarks") + tutorial_files = readdir(tutorial_dir) md_tutorial_files = [split(file, ".")[1] * ".md" for file in tutorial_files] +benchmark_files = readdir(benchmarks_dir) +md_benchmark_files = [split(file, ".")[1] * ".md" for file in benchmark_files] -include_tutorial = false +include_tutorial = true if include_tutorial for file in tutorial_files From 99aacfb2db17d2d8095df6209601801d8c94f20a Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 7 Nov 2024 12:33:30 +0100 Subject: [PATCH 4/7] update --- docs/make.jl | 2 +- docs/src/benchmarks/fixed_size_shortest_path.md | 3 ++- docs/src/benchmarks/portfolio_optimization.md | 11 ++++++++++- docs/src/benchmarks/subset_selection.md | 2 +- docs/src/benchmarks/warcraft.md | 2 +- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 431e05a..16461f0 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -32,9 +32,9 @@ makedocs(; "Tutorials" => include_tutorial ? md_tutorial_files : [], "Benchmark problems list" => [ "benchmarks/subset_selection.md", - "benchmarks/portfolio_optimization.md", "benchmarks/fixed_size_shortest_path.md", "benchmarks/warcraft.md", + "benchmarks/portfolio_optimization.md", ], "API reference" => ["api/interface.md", "api/decision_focused.md", "api/warcraft.md"], diff --git a/docs/src/benchmarks/fixed_size_shortest_path.md b/docs/src/benchmarks/fixed_size_shortest_path.md index de4fbc3..f483587 100644 --- a/docs/src/benchmarks/fixed_size_shortest_path.md +++ b/docs/src/benchmarks/fixed_size_shortest_path.md @@ -1,3 +1,4 @@ # Shortest paths -[`FixedSizeShortestPathBenchmark`](@ref) +[`FixedSizeShortestPathBenchmark`](@ref) is a benchmark problem that consists of finding the shortest path in a grid graph between the top left and bottom right corners. +In this benchmark, the grid size is the same for all instances. diff --git a/docs/src/benchmarks/portfolio_optimization.md b/docs/src/benchmarks/portfolio_optimization.md index 9fd59b8..aef8225 100644 --- a/docs/src/benchmarks/portfolio_optimization.md +++ b/docs/src/benchmarks/portfolio_optimization.md @@ -1,3 +1,12 @@ # Portfolio Optimization -[`PortfolioOptimizationBenchmark`](@ref) +[`PortfolioOptimizationBenchmark`](@ref) is a Markovitz portfolio optimization problem, where asset prices are unknown, and only contextual data is available to predict these prices. +The goal is to predict asset prices $c$ and maximize the expected return of a portfolio, subject to a risk constraint using this maximization program: +```math +\begin{aligned} +\max\quad & c^\top x\\ +\text{s.t.}\quad & x^\top \Sigma x \leq \gamma\\ +& 1^\top x \leq 1\\ +& x \geq 0 +\end{aligned} +``` diff --git a/docs/src/benchmarks/subset_selection.md b/docs/src/benchmarks/subset_selection.md index 7415ed9..918e424 100644 --- a/docs/src/benchmarks/subset_selection.md +++ b/docs/src/benchmarks/subset_selection.md @@ -10,4 +10,4 @@ We want to select a subset of ``k`` items that maximizes the sum of the values o As input, instead of the items costs, we are given a feature vector, such that an unknown linear mapping between the feature vector and the value of the items exists. By default, this linear mapping is the identity mapping, i.e., the value of each item is equal to the value of the corresponding feature vector element. -However, this mapping can be changed by setting the `identity_mapping` parameter to false. +However, this mapping can be changed by setting the `identity_mapping` parameter to `false`. diff --git a/docs/src/benchmarks/warcraft.md b/docs/src/benchmarks/warcraft.md index 3105a40..c78850e 100644 --- a/docs/src/benchmarks/warcraft.md +++ b/docs/src/benchmarks/warcraft.md @@ -1,3 +1,3 @@ # Warcraft -[`WarcraftBenchmark`](@ref) \ No newline at end of file +See the tutorial for a full demo of [`WarcraftBenchmark`](@ref). From a05fdd41de0a7f42883510d28a537d14806cb264 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 7 Nov 2024 13:44:32 +0100 Subject: [PATCH 5/7] fix coverage --- test/subset_selection.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/subset_selection.jl b/test/subset_selection.jl index 6cfdcbb..cd03b52 100644 --- a/test/subset_selection.jl +++ b/test/subset_selection.jl @@ -5,12 +5,14 @@ k = 5 b = SubsetSelectionBenchmark(; n=n, k=k) + b2 = SubsetSelectionBenchmark(; n=n, k=k, identity_mapping=false) io = IOBuffer() show(io, b) @test String(take!(io)) == "SubsetSelectionBenchmark(n=25, k=5)" dataset = generate_dataset(b, 50) + dataset2 = generate_dataset(b2, 50) model = generate_statistical_model(b) maximizer = generate_maximizer(b) From 37c936e487494293ce749609b9125bb5386e15ed Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 7 Nov 2024 13:57:28 +0100 Subject: [PATCH 6/7] fix tests --- test/subset_selection.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/subset_selection.jl b/test/subset_selection.jl index cd03b52..55ae106 100644 --- a/test/subset_selection.jl +++ b/test/subset_selection.jl @@ -5,14 +5,13 @@ k = 5 b = SubsetSelectionBenchmark(; n=n, k=k) - b2 = SubsetSelectionBenchmark(; n=n, k=k, identity_mapping=false) io = IOBuffer() show(io, b) @test String(take!(io)) == "SubsetSelectionBenchmark(n=25, k=5)" dataset = generate_dataset(b, 50) - dataset2 = generate_dataset(b2, 50) + dataset2 = generate_dataset(b, 50; identity_mappinf=false) model = generate_statistical_model(b) maximizer = generate_maximizer(b) From 4f83cbdfa848753aef02050483360e83fa9cbe70 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Thu, 7 Nov 2024 14:16:05 +0100 Subject: [PATCH 7/7] fix tests again --- test/subset_selection.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/subset_selection.jl b/test/subset_selection.jl index 55ae106..91db249 100644 --- a/test/subset_selection.jl +++ b/test/subset_selection.jl @@ -11,7 +11,7 @@ @test String(take!(io)) == "SubsetSelectionBenchmark(n=25, k=5)" dataset = generate_dataset(b, 50) - dataset2 = generate_dataset(b, 50; identity_mappinf=false) + dataset2 = generate_dataset(b, 50; identity_mapping=false) model = generate_statistical_model(b) maximizer = generate_maximizer(b)