diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml index 042b468347..acd76178c8 100644 --- a/.JuliaFormatter.toml +++ b/.JuliaFormatter.toml @@ -1,2 +1,2 @@ style = "blue" -ignore = ["tutorials"] \ No newline at end of file +ignore = ["tutorials", ".git"] \ No newline at end of file diff --git a/Changelog.md b/Changelog.md index f23dc95302..0bbe45d1c3 100644 --- a/Changelog.md +++ b/Changelog.md @@ -7,9 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.5.5] unreleased +### Added + +* the Levenberg-Marquardt algorithm internally uses a `VectorGradientFunction`, which allows + to use a vector of gradients of a function returning all gradients as well for the algorithm +* The `VectorGradientFunction` now also have a `get_jacobian` function + ### Changed * Minimum Julia version is now 1.10 (the LTS which replaced 1.6) +* The vectorial functions had a bug where the original vector function for the mutating case + was not always treated as mutating. ### Removed @@ -20,7 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added * An automated detection whether the tutorials are present - if not an also no quarto run is done, an automated `--exlcude-tutorials` option is added. + if not an also no quarto run is done, an automated `--exclude-tutorials` option is added. * Support for ManifoldDiff 0.4 * icons upfront external links when they link to another package or wikipedia. @@ -32,7 +40,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -* stabilize `max_Stepzise` to also work when `injectivity_radius` dos not exist. +* stabilize `max_stepsize` to also work when `injectivity_radius` dos not exist. It however would warn new users, that activate tutorial mode. * Start a `ManoptTestSuite` subpackage to store dummy types and common test helpers in. @@ -71,8 +79,8 @@ In general we introduce a few factories, that avoid having to pass the manifold ### Changed -* Any `Stepsize` now hase a `Stepsize` struct used internally as the original `struct`s before. The newly exported terms aim to fit `stepsize=...` in naming and create a `ManifoldDefaultsFactory` instead, so that any stepsize can be created without explicitly specifying the manifold. - * `ConstantStepsize` is no longer exported, use `ConstantLength` instead. The length parameter is now a positional argument following the (optonal) manifold. Besides that `ConstantLength` works as before,just that omitting the manifold fills the one specified in the solver now. +* Any `Stepsize` now has a `Stepsize` struct used internally as the original `struct`s before. The newly exported terms aim to fit `stepsize=...` in naming and create a `ManifoldDefaultsFactory` instead, so that any stepsize can be created without explicitly specifying the manifold. + * `ConstantStepsize` is no longer exported, use `ConstantLength` instead. The length parameter is now a positional argument following the (optional) manifold. Besides that `ConstantLength` works as before,just that omitting the manifold fills the one specified in the solver now. * `DecreasingStepsize` is no longer exported, use `DecreasingLength` instead. `ConstantLength` works as before,just that omitting the manifold fills the one specified in the solver now. * `ArmijoLinesearch` is now called `ArmijoLinesearchStepsize`. `ArmijoLinesearch` works as before,just that omitting the manifold fills the one specified in the solver now. * `WolfePowellLinesearch` is now called `WolfePowellLinesearchStepsize`, its constant `c_1` is now unified with Armijo and called `sufficient_decrease`, `c_2` was renamed to `sufficient_curvature`. Besides that, `WolfePowellLinesearch` works as before, just that omitting the manifold fills the one specified in the solver now. @@ -134,7 +142,7 @@ In general we introduce a few factories, that avoid having to pass the manifold * `ExactPenaltyMethodState(M, sub_problem; evaluation=...)` was added and `ExactPenaltyMethodState(M, sub_problem, sub_state; evaluation=...)` now has `p=rand(M)` as keyword argument instead of being the second positional one * `DifferenceOfConvexState(M, sub_problem; evaluation=...)` was added and `DifferenceOfConvexState(M, sub_problem, sub_state; evaluation=...)` now has `p=rand(M)` as keyword argument instead of being the second positional one * `DifferenceOfConvexProximalState(M, sub_problem; evaluation=...)` was added and `DifferenceOfConvexProximalState(M, sub_problem, sub_state; evaluation=...)` now has `p=rand(M)` as keyword argument instead of being the second positional one - * bumped `Manifolds.jl`to version 0.10; this mainly means that any algorithm working on a productmanifold and requiring `ArrayPartition` now has to explicitly do `using RecursiveArrayTools`. + * bumped `Manifolds.jl`to version 0.10; this mainly means that any algorithm working on a product manifold and requiring `ArrayPartition` now has to explicitly do `using RecursiveArrayTools`. ### Fixed * the `AverageGradientRule` filled its internal vector of gradients wrongly – or mixed it up in parallel transport. This is now fixed. diff --git a/docs/src/plans/objective.md b/docs/src/plans/objective.md index 48d10b2529..2d6796107a 100644 --- a/docs/src/plans/objective.md +++ b/docs/src/plans/objective.md @@ -121,6 +121,8 @@ ManifoldCostGradientObjective ```@docs get_gradient get_gradients +get_residuals +get_residuals! ``` and internally @@ -129,12 +131,6 @@ and internally get_gradient_function ``` -#### Internal helpers - -```@docs -get_gradient_from_Jacobian! -``` - ### Subgradient objective ```@docs @@ -262,6 +258,8 @@ Manopt.FunctionVectorialType #### Access functions ```@docs +Manopt.get_jacobian +Manopt.get_jacobian! Manopt.get_value Manopt.get_value_function Base.length(::VectorGradientFunction) @@ -271,6 +269,9 @@ Base.length(::VectorGradientFunction) ```@docs Manopt._to_iterable_indices +Manopt._change_basis! +Manopt.get_basis +Manopt.get_range ``` ### Subproblem objective diff --git a/docs/src/solvers/ChambollePock.md b/docs/src/solvers/ChambollePock.md index 7c1af4e431..71816e9766 100644 --- a/docs/src/solvers/ChambollePock.md +++ b/docs/src/solvers/ChambollePock.md @@ -116,7 +116,7 @@ The [`ChambollePock`](@ref) solver requires the following functions of a manifol * A [`retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, q, p, X)`; it is recommended to set the [`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `retraction_method=` or `retraction_method_dual=` (for ``\mathcal N``) does not have to be specified. * An [`inverse_retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, X, p, q)`; it is recommended to set the [`default_inverse_retraction_method`](@extref `ManifoldsBase.default_inverse_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `inverse_retraction_method=` or `inverse_retraction_method_dual=` (for ``\mathcal N``) does not have to be specified. * A [`vector_transport_to!`](@extref ManifoldsBase :doc:`vector_transports`)`M, Y, p, X, q)`; it is recommended to set the [`default_vector_transport_method`](@extref `ManifoldsBase.default_vector_transport_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `vector_transport_method=` or `vector_transport_method_dual=` (for ``\mathcal N``) does not have to be specified. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. ## Literature diff --git a/docs/src/solvers/DouglasRachford.md b/docs/src/solvers/DouglasRachford.md index f7a04ccff0..165f767472 100644 --- a/docs/src/solvers/DouglasRachford.md +++ b/docs/src/solvers/DouglasRachford.md @@ -74,7 +74,7 @@ The [`DouglasRachford`](@ref) solver requires the following functions of a manif * A [`retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, q, p, X)`; it is recommended to set the [`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `retraction_method=` does not have to be specified. * An [`inverse_retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, X, p, q)`; it is recommended to set the [`default_inverse_retraction_method`](@extref `ManifoldsBase.default_inverse_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `inverse_retraction_method=` does not have to be specified. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. By default, one of the stopping criteria is [`StopWhenChangeLess`](@ref), which requires diff --git a/docs/src/solvers/LevenbergMarquardt.md b/docs/src/solvers/LevenbergMarquardt.md index dfb7295edb..82d7cd38a5 100644 --- a/docs/src/solvers/LevenbergMarquardt.md +++ b/docs/src/solvers/LevenbergMarquardt.md @@ -21,7 +21,7 @@ The [`LevenbergMarquardt`](@ref) solver requires the following functions of a ma * A [`retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, q, p, X)`; it is recommended to set the [`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `retraction_method=` does not have to be specified. * the [`norm`](@extref `LinearAlgebra.norm-Tuple{AbstractManifold, Any, Any}`) as well, to stop when the norm of the gradient is small, but if you implemented `inner`, the norm is provided already. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. ## Literature diff --git a/docs/src/solvers/augmented_Lagrangian_method.md b/docs/src/solvers/augmented_Lagrangian_method.md index 1a830f3a6c..1dfb306694 100644 --- a/docs/src/solvers/augmented_Lagrangian_method.md +++ b/docs/src/solvers/augmented_Lagrangian_method.md @@ -26,7 +26,7 @@ AugmentedLagrangianGrad The [`augmented_Lagrangian_method`](@ref) solver requires the following functions of a manifold to be available -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. * Everything the subsolver requires, which by default is the [`quasi_Newton`](@ref) method * A [`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`. diff --git a/docs/src/solvers/difference_of_convex.md b/docs/src/solvers/difference_of_convex.md index 33082d90b1..37e9a472d1 100644 --- a/docs/src/solvers/difference_of_convex.md +++ b/docs/src/solvers/difference_of_convex.md @@ -67,7 +67,7 @@ which either requires * A [`retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, q, p, X)`; it is recommended to set the [`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `retraction_method=` or `retraction_method_dual=` (for ``\mathcal N``) does not have to be specified. * An [`inverse_retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, X, p, q)`; it is recommended to set the [`default_inverse_retraction_method`](@extref `ManifoldsBase.default_inverse_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `inverse_retraction_method=` or `inverse_retraction_method_dual=` (for ``\mathcal N``) does not have to be specified or the [`distance`](@extref `ManifoldsBase.distance-Tuple{AbstractManifold, Any, Any}`)`(M, p, q)` for said default inverse retraction. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. * By default the tangent vector storing the gradient is initialized calling [`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`. * everything the subsolver requires, which by default is the [`trust_regions`](@ref) or if you do not provide a Hessian [`gradient_descent`](@ref). diff --git a/docs/src/solvers/exact_penalty_method.md b/docs/src/solvers/exact_penalty_method.md index 7237fe4236..4bb3490664 100644 --- a/docs/src/solvers/exact_penalty_method.md +++ b/docs/src/solvers/exact_penalty_method.md @@ -30,7 +30,7 @@ LogarithmicSumOfExponentials The [`exact_penalty_method`](@ref) solver requires the following functions of a manifold to be available -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. * Everything the subsolver requires, which by default is the [`quasi_Newton`](@ref) method * A [`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`. diff --git a/docs/src/solvers/particle_swarm.md b/docs/src/solvers/particle_swarm.md index c804cfe5e4..91b80f9098 100644 --- a/docs/src/solvers/particle_swarm.md +++ b/docs/src/solvers/particle_swarm.md @@ -30,7 +30,7 @@ The [`particle_swarm`](@ref) solver requires the following functions of a manifo * A [`vector_transport_to!`](@extref ManifoldsBase :doc:`vector_transports`)`M, Y, p, X, q)`; it is recommended to set the [`default_vector_transport_method`](@extref `ManifoldsBase.default_vector_transport_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `vector_transport_method=` does not have to be specified. * By default the stopping criterion uses the [`norm`](@extref `LinearAlgebra.norm-Tuple{AbstractManifold, Any, Any}`) as well, to stop when the norm of the gradient is small, but if you implemented `inner`, the norm is provided already. * Tangent vectors storing the social and cognitive vectors are initialized calling [`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. * The [`distance`](@extref `ManifoldsBase.distance-Tuple{AbstractManifold, Any, Any}`)`(M, p, q)` when using the default stopping criterion, which uses [`StopWhenChangeLess`](@ref). ## Literature diff --git a/docs/src/solvers/primal_dual_semismooth_Newton.md b/docs/src/solvers/primal_dual_semismooth_Newton.md index e2e0607f79..d570816600 100644 --- a/docs/src/solvers/primal_dual_semismooth_Newton.md +++ b/docs/src/solvers/primal_dual_semismooth_Newton.md @@ -78,7 +78,7 @@ The [`primal_dual_semismooth_Newton`](@ref) solver requires the following functi * A [`retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, q, p, X)`; it is recommended to set the [`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `retraction_method=` does not have to be specified. * An [`inverse_retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, X, p, q)`; it is recommended to set the [`default_inverse_retraction_method`](@extref `ManifoldsBase.default_inverse_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `inverse_retraction_method=` does not have to be specified. * A [`vector_transport_to!`](@extref ManifoldsBase :doc:`vector_transports`)`M, Y, p, X, q)`; it is recommended to set the [`default_vector_transport_method`](@extref `ManifoldsBase.default_vector_transport_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `vector_transport_method=` does not have to be specified. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. * A [`get_basis`](@extref `ManifoldsBase.get_basis-Tuple{AbstractManifold, Any, ManifoldsBase.AbstractBasis}`) for the [`DefaultOrthonormalBasis`](@extref `ManifoldsBase.DefaultOrthonormalBasis`) on ``\mathcal M`` * [`exp`](@extref `Base.exp-Tuple{AbstractManifold, Any, Any}`) and [`log`](@extref `Base.log-Tuple{AbstractManifold, Any, Any}`) (on ``\mathcal M``) * A [`DiagonalizingOrthonormalBasis`](@extref `ManifoldsBase.DiagonalizingOrthonormalBasis`) to compute the differentials of the exponential and logarithmic map diff --git a/docs/src/solvers/truncated_conjugate_gradient_descent.md b/docs/src/solvers/truncated_conjugate_gradient_descent.md index aabcacd116..1a331d7cac 100644 --- a/docs/src/solvers/truncated_conjugate_gradient_descent.md +++ b/docs/src/solvers/truncated_conjugate_gradient_descent.md @@ -51,7 +51,7 @@ The [`trust_regions`](@ref) solver requires the following functions of a manifol * if you do not provide a `trust_region_radius=`, then [`injectivity_radius`](@extref `ManifoldsBase.injectivity_radius-Tuple{AbstractManifold}`) on the manifold `M` is required. * the [`norm`](@extref `LinearAlgebra.norm-Tuple{AbstractManifold, Any, Any}`) as well, to stop when the norm of the gradient is small, but if you implemented `inner`, the norm is provided already. * A [`zero_vector!`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,X,p)`. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. ## Literature diff --git a/docs/src/solvers/trust_regions.md b/docs/src/solvers/trust_regions.md index 14548e373e..755b04a443 100644 --- a/docs/src/solvers/trust_regions.md +++ b/docs/src/solvers/trust_regions.md @@ -66,7 +66,7 @@ The [`trust_regions`](@ref) solver requires the following functions of a manifol * A [`retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, q, p, X)`; it is recommended to set the [`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `retraction_method=` does not have to be specified. * By default the stopping criterion uses the [`norm`](@extref `LinearAlgebra.norm-Tuple{AbstractManifold, Any, Any}`) as well, to stop when the norm of the gradient is small, but if you implemented `inner`, the norm is provided already. * if you do not provide an initial `max_trust_region_radius`, a [`manifold_dimension`](@extref `ManifoldsBase.manifold_dimension-Tuple{AbstractManifold}`) is required. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. * By default the tangent vectors are initialized calling [`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`. diff --git a/src/Manopt.jl b/src/Manopt.jl index dfb6417e64..091f0accfc 100644 --- a/src/Manopt.jl +++ b/src/Manopt.jl @@ -11,7 +11,7 @@ module Manopt import Base: &, copy, getindex, identity, length, setindex!, show, | import LinearAlgebra: reflect! import ManifoldsBase: embed!, plot_slope, prepare_check_result, find_best_slope_window -import ManifoldsBase: base_manifold, base_point +import ManifoldsBase: base_manifold, base_point, get_basis using ColorSchemes using ColorTypes using Colors @@ -364,6 +364,8 @@ export get_proximal_map, get_proximal_map! export get_state, get_initial_stepsize, get_iterate, + get_jacobian, + get_jacobian!, get_gradients, get_gradients!, get_manifold, @@ -379,6 +381,8 @@ export get_state, get_differential_dual_prox!, set_gradient!, set_iterate!, + get_residuals, + get_residuals!, linearized_forward_operator, linearized_forward_operator!, adjoint_linearized_operator, diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl index e2f88d9483..4d8dbc5cc0 100644 --- a/src/documentation_glossary.jl +++ b/src/documentation_glossary.jl @@ -50,7 +50,8 @@ end # --- # LaTeX -define!(:LaTeX, :argmin, raw"\operatorname{arg\,min}") +define!(:LaTeX, :abs, (v) -> raw"\lvert " * "$v" * raw" \rvert") +define!(:LaTeX, :argmin, raw"\operatorname*{arg\,min}") define!(:LaTeX, :ast, raw"\ast") define!(:LaTeX, :bar, (letter) -> raw"\bar" * "$(letter)") define!(:LaTeX, :big, raw"\big") @@ -65,11 +66,11 @@ define!( :cases, (c...) -> raw"\begin{cases}" * - "\n" * - "$(join([" $(ci)" for ci in c], raw"\\\\"*"\n"))" * - "\n" * + "$(join([" $(ci)" for ci in c], raw"\\\\ "))" * raw"\end{cases}", ) +define!(:LaTeX, :cdots, raw"\cdots") +define!(:LaTeX, :ddots, raw"\ddots") define!(:LaTeX, :deriv, (t = "t") -> raw"\frac{\mathrm{d}}{\mathrm{d}" * "$(t)" * "}") define!(:LaTeX, :displaystyle, raw"\displaystyle") define!(:LaTeX, :frac, (a, b) -> raw"\frac" * "{$a}{$b}") @@ -81,13 +82,22 @@ define!(:LaTeX, :log, raw"\log") define!(:LaTeX, :max, raw"\max") define!(:LaTeX, :min, raw"\min") define!(:LaTeX, :norm, (v; index = "") -> raw"\lVert " * "$v" * raw" \rVert" * "_{$index}") +define!( + :LaTeX, + :pmatrix, + (lines...) -> raw"\begin{pmatrix} " * join(lines, raw"\\ ") * raw"\end{pmatrix}", +) define!(:LaTeX, :prox, raw"\operatorname{prox}") define!(:LaTeX, :quad, raw"\quad") define!(:LaTeX, :reflect, raw"\operatorname{refl}") define!(:LaTeX, :retr, raw"\operatorname{retr}") +define!(:LaTeX, :rm, (letter) -> raw"\mathrm{" * "$letter" * "}") +define!(:LaTeX, :sqrt, (s) -> raw"\sqrt{" * "$s}") define!(:LaTeX, :subgrad, raw"∂") define!(:LaTeX, :sum, raw"\sum") define!(:LaTeX, :text, (letter) -> raw"\text{" * "$letter" * "}") +define!(:LaTeX, :transp, raw"\mathrm{T}") +define!(:LaTeX, :vdots, raw"\vdots") define!(:LaTeX, :vert, raw"\vert") define!(:LaTeX, :widehat, (letter) -> raw"\widehat{" * "$letter" * "}") _tex(args...; kwargs...) = glossary(:LaTeX, args...; kwargs...) @@ -99,7 +109,8 @@ _tex(args...; kwargs...) = glossary(:LaTeX, args...; kwargs...) define!(:Math, :distance, raw"\mathrm{d}") define!(:Math, :M, (; M="M") -> _math(:Manifold, :symbol; M=M)) define!(:Math, :Manifold, :symbol, (; M="M") -> _tex(:Cal, M)) -define!(:Math, :Manifold, :description, "the Riemannian manifold") +define!(:Math, :Manifold, :descrption, "the Riemannian manifold") +define!(:Math, :M, (; M="M") -> _math(:Manifold, :symbol; M=M)) define!(:Math, :Iterate, (; p="p", k="k") -> "$(p)^{($(k))}") define!( :Math, @@ -231,25 +242,39 @@ define!( # # # Problems +_problem(args...; kwargs...) = glossary(:Problem, args...; kwargs...) + define!( :Problem, :Constrained, (; M="M", p="p") -> """ ```math \\begin{aligned} -\\min_{$p ∈ $(_tex(:Cal, M))} & f($p)\\\\ +$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} & f($p)\\\\ $(_tex(:text, "subject to"))$(_tex(:quad))&g_i($p) ≤ 0 \\quad $(_tex(:text, " for ")) i= 1, …, m,\\\\ \\quad & h_j($p)=0 \\quad $(_tex(:text, " for ")) j=1,…,n, \\end{aligned} ``` """, ) +define!(:Problem, :Default, (; M="M", p="p") -> """ + ```math + $(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} f($p) + ``` + """) define!( :Problem, - :Default, - (; M="M", p="p") -> "\n```math\n$(_tex(:argmin))_{$p ∈ $(_tex(:Cal, M))} f($p)\n```\n", + :NonLinearLeastSquares, + (; M="M", p="p") -> """ +```math +$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2)) $(_tex(:sum))_{i=1}^m $(_tex(:abs, "f_i($p)"))^2 +``` + +where ``f: $(_math(:M; M=M)) → ℝ^m`` is written with component functions ``f_i: $(_math(:M; M=M)) → ℝ``, ``i=1,…,m``, +and each component function is continuously differentiable. +""", ) -_problem(args...; kwargs...) = glossary(:Problem, args...; kwargs...) + # # # Stopping Criteria diff --git a/src/plans/constrained_plan.jl b/src/plans/constrained_plan.jl index 6edba426a3..875d12efbb 100644 --- a/src/plans/constrained_plan.jl +++ b/src/plans/constrained_plan.jl @@ -332,7 +332,7 @@ components gradients, for example In another interpretation, this can be considered a point on the tangent space at ``P = (p,…,p) \in \mathcal M^m``, so in the tangent space to the [`PowerManifold`](@extref `ManifoldsBase.PowerManifold`) ``\mathcal M^m``. -The case where this is a [`NestedPowerRepresentation`](@extref) this agrees with the +The case where this is a [`NestedPowerRepresentation`](@extref `ManifoldsBase.NestedPowerRepresentation`) this agrees with the interpretation from before, but on power manifolds, more efficient representations exist. To then access the elements, the range has to be specified. That is what this diff --git a/src/plans/manifold_default_factory.jl b/src/plans/manifold_default_factory.jl index 30f737525f..9fcb3131f7 100644 --- a/src/plans/manifold_default_factory.jl +++ b/src/plans/manifold_default_factory.jl @@ -7,9 +7,9 @@ decission on which manifold to use to a later point For now this is established for -* [`DirectionUpdateRule`](@ref)s (TODO: WIP) -* [`Stepsize`](@ref) (TODO: WIP) -* [`StoppingCriterion`](@ref) (TODO:WIP) +* [`DirectionUpdateRule`](@ref)s +* [`Stepsize`](@ref) +* [`StoppingCriterion`](@ref) This factory stores necessary and optional parameters as well as keyword arguments provided by the user to later produce the type this factory is for. diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 007353d529..c7bfb7a697 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -1,132 +1,184 @@ @doc """ - NonlinearLeastSquaresObjective{T<:AbstractEvaluationType} <: AbstractManifoldObjective{T} + NonlinearLeastSquaresObjective{E<:AbstractEvaluationType} <: AbstractManifoldObjective{T} -A type for nonlinear least squares problems. -`T` is a [`AbstractEvaluationType`](@ref) for the `F` and Jacobian functions. +An objective to model the nonlinear least squares problem + +$(_problem(:NonLinearLeastSquares)) Specify a nonlinear least squares problem # Fields -* `f` a function ``f: $(_math(:M)) → ℝ^d`` to minimize -* `jacobian!!` Jacobian of the function ``f`` -* `jacobian_tangent_basis` the basis of tangent space used for computing the Jacobian. -* `num_components` number of values returned by `f` (equal to `d`). -Depending on the [`AbstractEvaluationType`](@ref) `T` the function ``F`` has to be provided: +* `objective`: a [`AbstractVectorGradientFunction`](@ref)`{E}` containing both the vector of + cost functions ``f_i`` (or a function returning a vector of costs) as well as their + gradients ``$(_tex(:grad)) f_i`` (or Jacobian of the vector-valued function). -* as a functions `(M::AbstractManifold, p) -> v` that allocates memory for `v` itself for - an [`AllocatingEvaluation`](@ref), -* as a function `(M::AbstractManifold, v, p) -> v` that works in place of `v` for a - [`InplaceEvaluation`](@ref). +This `NonlinearLeastSquaresObjective` then has the same [`AbstractEvaluationType`](@ref) `T` +as the (inner) `objective`. -Also the Jacobian ``jacF!!`` is required: +# Constructors -* as a functions `(M::AbstractManifold, p; basis_domain::AbstractBasis) -> v` that allocates - memory for `v` itself for an [`AllocatingEvaluation`](@ref), -* as a function `(M::AbstractManifold, v, p; basis_domain::AbstractBasis) -> v` that works - in place of `v` for an [`InplaceEvaluation`](@ref). + NonlinearLeastSquaresObjective(f, jacobian, range_dimension::Integer; kwargs...) + NonlinearLeastSquaresObjective(vf::AbstractVectorGradientFunction) -# Constructors +# Arguments + +* `f` the vectorial cost function ``f: $(_math(:M)) → ℝ^m`` +* `jacobian` the Jacobian, might also be a vector of gradients of the component functions of `f` +* `range_dimension::Integer` the number of dimensions `m` the function `f` maps into + +These three can also be passed as a [`AbstractVectorGradientFunction`](@ref) `vf` already. - NonlinearLeastSquaresProblem(M, F, jacF, num_components; evaluation=AllocatingEvaluation(), jacobian_tangent_basis=DefaultOrthonormalBasis()) +# Keyword arguments + +$(_var(:Keyword, :evaluation)) +* `function_type::`[`AbstractVectorialType`](@ref)`=`[`FunctionVectorialType`](@ref)`()`: specify + the format the residuals are given in. By default a function returning a vector. +* `jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis()`; shortcut to specify + the basis the Jacobian matrix is build with. +* `jacobian_type::`[`AbstractVectorialType`](@ref)`=`[`CoordinateVectorialType`](@ref)`(jacobian_tangent_basis)`: + specify the format the Jacobian is given in. By default a matrix of the differential with + respect to a certain basis of the tangent space. # See also [`LevenbergMarquardt`](@ref), [`LevenbergMarquardtState`](@ref) """ -struct NonlinearLeastSquaresObjective{E<:AbstractEvaluationType,TC,TJ,TB<:AbstractBasis} <: - AbstractManifoldGradientObjective{E,TC,TJ} - f::TC - jacobian!!::TJ - jacobian_tangent_basis::TB - num_components::Int +struct NonlinearLeastSquaresObjective{ + E<:AbstractEvaluationType,F<:AbstractVectorGradientFunction{E} +} <: AbstractManifoldGradientObjective{E,F,F} + objective::F end + function NonlinearLeastSquaresObjective( - f::TF, - jacobian_f::TJ, - num_components::Int; + f, + jacobian, + range_dimension::Integer; evaluation::AbstractEvaluationType=AllocatingEvaluation(), - jacobian_tangent_basis::TB=DefaultOrthonormalBasis(), -) where {TF,TJ,TB<:AbstractBasis} - return NonlinearLeastSquaresObjective{typeof(evaluation),TF,TJ,TB}( - f, jacobian_f, jacobian_tangent_basis, num_components + jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis(), + jacobian_type::AbstractVectorialType=CoordinateVectorialType(jacobian_tangent_basis), + function_type::AbstractVectorialType=FunctionVectorialType(), + kwargs..., +) + vgf = VectorGradientFunction( + f, + jacobian, + range_dimension; + evaluation=evaluation, + jacobian_type=jacobian_type, + function_type=function_type, ) + return NonlinearLeastSquaresObjective(vgf; kwargs...) end +# Cost function get_cost( - M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{AllocatingEvaluation}, p -) - return 1//2 * norm(nlso.f(M, p))^2 + M::AbstractManifold, + nlso::NonlinearLeastSquaresObjective{ + E,<:AbstractVectorFunction{E,<:ComponentVectorialType} + }, + p; + kwargs..., +) where {E<:AbstractEvaluationType} + v = 0.0 + for i in 1:length(nlso.objective) + v += abs(get_value(M, nlso.objective, p, i))^2 + end + v /= 2 + return v end function get_cost( - M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, p -) - residual_values = zeros(nlso.num_components) - nlso.f(M, residual_values, p) - return 1//2 * norm(residual_values)^2 + M::AbstractManifold, + nlso::NonlinearLeastSquaresObjective{ + E,<:AbstractVectorFunction{E,<:FunctionVectorialType} + }, + p; + value_cache=get_value(M, nlso.objective, p), +) where {E<:AbstractEvaluationType} + return sum(abs2, value_cache) / 2 end -""" - get_gradient_from_Jacobian!( - M::AbstractManifold, - X, - nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, - p, - Jval=zeros(nlso.num_components, manifold_dimension(M)), - ) - -Compute gradient of [`NonlinearLeastSquaresObjective`](@ref) `nlso` at point `p` in place of -`X`, with temporary Jacobian stored in the optional argument `Jval`. -""" -function get_gradient_from_Jacobian!( - M::AbstractManifold, - X, - nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, - p, - Jval=zeros(nlso.num_components, manifold_dimension(M)), +function get_jacobian( + M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) - basis_p = _maybe_get_basis(M, p, nlso.jacobian_tangent_basis) - nlso.jacobian!!(M, Jval, p; basis_domain=basis_p) - residual_values = zeros(nlso.num_components) - nlso.f(M, residual_values, p) - get_vector!(M, X, p, transpose(Jval) * residual_values, basis_p) - return X + J = zeros(length(nlso.objective), manifold_dimension(M)) + get_jacobian!(M, J, nlso, p; kwargs...) + return J end - -function get_gradient( - M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{AllocatingEvaluation}, p +# The jacobian is now just a pass-through +function get_jacobian!( + M::AbstractManifold, J, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) - basis_x = _maybe_get_basis(M, p, nlso.jacobian_tangent_basis) - Jval = nlso.jacobian!!(M, p; basis_domain=basis_x) - residual_values = nlso.f(M, p) - return get_vector(M, p, transpose(Jval) * residual_values, basis_x) + get_jacobian!(M, J, nlso.objective, p; kwargs...) + return J end function get_gradient( - M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, p + M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) - basis_x = _maybe_get_basis(M, p, nlso.jacobian_tangent_basis) - Jval = zeros(nlso.num_components, manifold_dimension(M)) - nlso.jacobian!!(M, Jval, p; basis_domain=basis_x) - residual_values = zeros(nlso.num_components) - nlso.f(M, residual_values, p) - return get_vector(M, p, transpose(Jval) * residual_values, basis_x) + X = zero_vector(M, p) + return get_gradient!(M, X, nlso, p; kwargs...) end - function get_gradient!( - M::AbstractManifold, X, nlso::NonlinearLeastSquaresObjective{AllocatingEvaluation}, p + M::AbstractManifold, + X, + nlso::NonlinearLeastSquaresObjective, + p; + basis=get_basis(nlso.objective.jacobian_type), + jacobian_cache=get_jacobian(M, nlso, p; basis=basis), + value_cache=get_residuals(M, nlso, p), ) - basis_x = _maybe_get_basis(M, p, nlso.jacobian_tangent_basis) - Jval = nlso.jacobian!!(M, p; basis_domain=basis_x) - residual_values = nlso.f(M, p) - return get_vector!(M, X, p, transpose(Jval) * residual_values, basis_x) + return get_vector!(M, X, p, transpose(jacobian_cache) * value_cache, basis) end -function get_gradient!( - M::AbstractManifold, X, nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, p +# +# +# --- Residuals +_doc_get_residuals_nlso = """ + get_residuals(M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p) + get_residuals!(M::AbstractManifold, V, nlso::NonlinearLeastSquaresObjective, p) + +Compute the vector of residuals ``f_i(p)``, ``i=1,…,m`` given the manifold `M`, +the [`NonlinearLeastSquaresObjective`](@ref) `nlso` and a current point ``p`` on `M`. +""" + +@doc "$(_doc_get_residuals_nlso)" +get_residuals(M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs...) + +function get_residuals( + M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) - get_gradient_from_Jacobian!(M, X, nlso, p) - return X + V = zeros(length(nlso.objective)) + return get_residuals!(M, V, nlso, p; kwargs...) +end + +@doc "$(_doc_get_residuals_nlso)" +get_residuals!(M::AbstractManifold, V, nlso::NonlinearLeastSquaresObjective, p; kwargs...) + +function get_residuals!( + M::AbstractManifold, + V, + nlso::NonlinearLeastSquaresObjective{ + E,<:AbstractVectorFunction{E,<:ComponentVectorialType} + }, + p; + kwargs..., +) where {E<:AbstractEvaluationType} + for i in 1:length(nlso.objective) + V[i] = get_value(M, nlso.objective, p, i) + end + return V +end +function get_residuals!( + M::AbstractManifold, + V, + nlso::NonlinearLeastSquaresObjective{ + E,<:AbstractVectorFunction{E,<:FunctionVectorialType} + }, + p, +) where {E<:AbstractEvaluationType} + get_value!(M, V, nlso.objective, p) + return V end @doc """ @@ -143,7 +195,7 @@ $(_var(:Field, :retraction_method)) * `residual_values`: value of ``F`` calculated in the solver setup or the previous iteration * `residual_values_temp`: value of ``F`` for the current proposal point $(_var(:Field, :stopping_criterion, "stop")) -* `jacF`: the current Jacobian of ``F`` +* `jacobian`: the current Jacobian of ``F`` * `gradient`: the current gradient of ``F`` * `step_vector`: the tangent vector at `x` that is used to move to the next point * `last_stepsize`: length of `step_vector` @@ -158,7 +210,7 @@ $(_var(:Field, :stopping_criterion, "stop")) # Constructor - LevenbergMarquardtState(M, initial_residual_values, initial_jacF; kwargs...) + LevenbergMarquardtState(M, initial_residual_values, initial_jacobian; kwargs...) Generate the Levenberg-Marquardt solver state. @@ -192,7 +244,7 @@ mutable struct LevenbergMarquardtState{ retraction_method::TRTM residual_values::Tresidual_values candidate_residual_values::Tresidual_values - jacF::TJac + jacobian::TJac X::TGrad step_vector::TGrad last_stepsize::Tparams @@ -205,7 +257,7 @@ mutable struct LevenbergMarquardtState{ function LevenbergMarquardtState( M::AbstractManifold, initial_residual_values::Tresidual_values, - initial_jacF::TJac; + initial_jacobian::TJac; p::P=rand(M), X::TGrad=zero_vector(M, p), stopping_criterion::StoppingCriterion=StopAfterIteration(200) | @@ -245,7 +297,7 @@ mutable struct LevenbergMarquardtState{ retraction_method, initial_residual_values, copy(initial_residual_values), - initial_jacF, + initial_jacobian, X, allocate(M, X), zero(Tparams), diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index 21f2eb8f10..04c5a713a6 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -1,8 +1,8 @@ -@doc raw""" +@doc """ AbstractVectorialType An abstract type for different representations of a vectorial function - ``f: \mathcal M → \mathbb R^m`` and its (component-wise) gradient/Jacobian +``f: $(_math(:M)) → ℝ^m`` and its (component-wise) gradient/Jacobian """ abstract type AbstractVectorialType end @@ -10,22 +10,40 @@ abstract type AbstractVectorialType end CoordinateVectorialType{B<:AbstractBasis} <: AbstractVectorialType A type to indicate that gradient of the constraints is implemented as a -Jacobian matrix with respect to a certain basis, that is if the constraints are -given as ``g: \mathcal M → ℝ^m`` with respect to a basis ``\mathcal B`` of ``T_p\mathcal M``, at ``p∈ \mathcal M`` +Jacobian matrix with respect to a certain basis, that is if the vector function +is ``f: \mathcal M → ℝ^m`` and we have a basis ``\mathcal B`` of ``T_p\mathcal M``, at ``p∈ \mathcal M`` This can be written as ``J_g(p) = (c_1^{\mathrm{T}},…,c_m^{\mathrm{T}})^{\mathrm{T}} \in ℝ^{m,d}``, that is, every row ``c_i`` of this matrix is a set of coefficients such that `get_coefficients(M, p, c, B)` is the tangent vector ``\oepratorname{grad} g_i(p)`` - for example ``g_i(p) ∈ ℝ^m`` or ``\operatorname{grad} g_i(p) ∈ T_p\mathcal M``, ``i=1,…,m``. # Fields -* `basis` an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) to indicate the default representation. +* `basis` an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) to indicate the basis + in which Jacobian is expressed. + +# Constructor + + CoordinateVectorialType(basis=DefaultOrthonormalBasis()) """ struct CoordinateVectorialType{B<:AbstractBasis} <: AbstractVectorialType basis::B end +CoordinateVectorialType() = CoordinateVectorialType(DefaultOrthonormalBasis()) +""" + get_basis(::AbstractVectorialType) + +Return a basis that fits a vector function representation. + +For the case, where some vectorial data is stored with respect to a basis, +this function returns the corresponding basis, most prominently for the [`CoordinateVectorialType`](@ref). + +If a type is not with respect to a certain basis, the [`DefaultOrthonormalBasis`](@extref `ManifoldsBase.DefaultOrthonormalBasis`) +is returned. +""" +get_basis(::AbstractVectorialType) = DefaultOrthonormalBasis() +get_basis(cvt::CoordinateVectorialType) = cvt.basis """ _to_iterable_indices(A::AbstractVector, i) @@ -51,12 +69,31 @@ for example ``g_i(p) ∈ ℝ^m`` or ``\operatorname{grad} g_i(p) ∈ T_p\mathcal struct ComponentVectorialType <: AbstractVectorialType end @doc raw""" - FunctionVectorialType <: AbstractVectorialType + FunctionVectorialType{P<:AbstractPowerRepresentation} <: AbstractVectorialType A type to indicate that constraints are implemented one whole functions, for example ``g(p) ∈ ℝ^m`` or ``\operatorname{grad} g(p) ∈ (T_p\mathcal M)^m``. + +This type internally stores the [`AbstractPowerRepresentation`](@extref `ManifoldsBase.AbstractPowerRepresentation`), +when it makes sense, especially for Hessian and gradient functions. """ -struct FunctionVectorialType <: AbstractVectorialType end +struct FunctionVectorialType{P<:AbstractPowerRepresentation} <: AbstractVectorialType + range::P +end + +""" + get_range(::AbstractVectorialType) + +Return an abstract power manifold representation that fits a vector function's range. +Most prominently a [`FunctionVectorialType`](@ref) returns its internal range. + +Otherwise the default [`NestedPowerRepresentation`](@extref `ManifoldsBase.NestedPowerRepresentation`)`()` is used to work on +a vector of data. +""" +get_range(vt::FunctionVectorialType) = vt.range +get_range(::AbstractVectorialType) = NestedPowerRepresentation() + +FunctionVectorialType() = FunctionVectorialType(NestedPowerRepresentation()) @doc raw""" AbstractVectorFunction{E, FT} <: Function @@ -69,9 +106,10 @@ format ``f`` is implemented as. There are three different representations of ``f``, which might be beneficial in one or the other situation: -* the [`FunctionVectorialType`](@ref), -* the [`ComponentVectorialType`](@ref), -* the [`CoordinateVectorialType`](@ref) with respect to a specific basis of the tangent space. +* the [`FunctionVectorialType`](@ref) storing a single function ``f`` that returns a vector, +* the [`ComponentVectorialType`](@ref) storing a vector of functions ``f_i`` that return a single value each, +* the [`CoordinateVectorialType`](@ref) storing functions with respect to a specific basis of the tangent space for gradients and Hessians. + Gradients of this type are usually referred to as Jacobians. For the [`ComponentVectorialType`](@ref) imagine that ``f`` could also be written using its component functions, @@ -90,9 +128,6 @@ For the [`FunctionVectorialType`](@ref) ``f`` is implemented as a single functi `f(M, p)`, that returns an `AbstractArray`. And advantage here is, that this is a single function. A disadvantage might be, that if this is expensive even to compute a single component, all of `f` has to be evaluated - -For the [`ComponentVectorialType`](@ref) of `f`, each of the component functions -is a (classical) objective. """ abstract type AbstractVectorFunction{E<:AbstractEvaluationType,FT<:AbstractVectorialType} <: Function end @@ -110,29 +145,29 @@ abstract type AbstractVectorGradientFunction{ E<:AbstractEvaluationType,FT<:AbstractVectorialType,JT<:AbstractVectorialType } <: AbstractVectorFunction{E,FT} end -@doc raw""" +@doc """ VectorGradientFunction{E, FT, JT, F, J, I} <: AbstractVectorGradientFunction{E, FT, JT} -Represent a function ``f:\mathcal M → ℝ^n`` including it first derivative, +Represent a function ``f:$(_math(:M)) → ℝ^n`` including it first derivative, either as a vector of gradients of a Jacobian -And hence has a gradient ``\oepratorname{grad} f_i(p) ∈ T_p\mathcal M``. +And hence has a gradient ``$(_tex(:grad)) f_i(p) ∈ $(_math(:TpM))`. Putting these gradients into a vector the same way as the functions, yields a [`ComponentVectorialType`](@ref) ```math -\operatorname{grad} f(p) = \Bigl( \operatorname{grad} f_1(p), \operatorname{grad} f_2(p), …, \operatorname{grad} f_n(p) \Bigr)^{\mathrm{T}} -∈ (T_p\mathcal M)^n +$(_tex(:grad)) f(p) = $(_tex(:Bigl))( $(_tex(:grad)) f_1(p), $(_tex(:grad)) f_2(p), …, $(_tex(:grad)) f_n(p) $(_tex(:Bigr)))^$(_tex(:transp)) +∈ ($(_math(:TpM)))^n ``` And advantage here is, that again the single components can be evaluated individually # Fields -* `value!!`: the cost function ``f``, which can take different formats -* `cost_type`: indicating / string data for the type of `f` -* `jacobian!!`: the Jacobian of ``f`` -* `jacobian_type`: indicating / storing data for the type of ``J_f`` +* `value!!::F`: the cost function ``f``, which can take different formats +* `cost_type::`[`AbstractVectorialType`](@ref): indicating / storing data for the type of `f` +* `jacobian!!::G`: the Jacobian of ``f`` +* `jacobian_type::`[`AbstractVectorialType`](@ref): indicating / storing data for the type of ``J_f`` * `parameters`: the number `n` from, the size of the vector ``f`` returns. # Constructor @@ -184,41 +219,43 @@ function VectorGradientFunction( ) end -@doc raw""" +_doc_vhf = """ VectorHessianFunction{E, FT, JT, HT, F, J, H, I} <: AbstractVectorGradientFunction{E, FT, JT} -Represent a function ``f:\mathcal M → ℝ^n`` including it first derivative, +Represent a function ``f:$(_math(:M)) M → ℝ^n`` including it first derivative, either as a vector of gradients of a Jacobian, and the Hessian, as a vector of Hessians of the component functions. Both the Jacobian and the Hessian can map into either a sequence of tangent spaces -or a single tangent space of the power manifold of lenth `n`. +or a single tangent space of the power manifold of length `n`. # Fields -* `value!!`: the cost function ``f``, which can take different formats -* `cost_type`: indicating / string data for the type of `f` -* `jacobian!!`: the Jacobian of ``f`` -* `jacobian_type`: indicating / storing data for the type of ``J_f`` -* `hessians!!`: the Hessians of ``f`` (in a component wise sense) -* `hessian_type`: indicating / storing data for the type of ``H_f`` -* `parameters`: the number `n` from, the size of the vector ``f`` returns. +* `value!!::F`: the cost function ``f``, which can take different formats +* `cost_type::`[`AbstractVectorialType`](@ref): indicating / string data for the type of `f` +* `jacobian!!::G`: the Jacobian ``J_f`` of ``f`` +* `jacobian_type::`[`AbstractVectorialType`](@ref): indicating / storing data for the type of ``J_f`` +* `hessians!!::H`: the Hessians of ``f`` (in a component wise sense) +* `hessian_type::`[`AbstractVectorialType`](@ref): indicating / storing data for the type of ``H_f`` +* `range_dimension`: the number `n` from, the size of the vector ``f`` returns. # Constructor - VectorGradientFunction(f, Jf, Hess_f, range_dimension; + VectorHessianFunction(f, Jf, Hess_f, range_dimension; evaluation::AbstractEvaluationType=AllocatingEvaluation(), function_type::AbstractVectorialType=FunctionVectorialType(), jacobian_type::AbstractVectorialType=FunctionVectorialType(), hessian_type::AbstractVectorialType=FunctionVectorialType(), ) -Create a `VectorGradientFunction` of `f` and its Jacobian (vector of gradients) `Jf` +Create a `VectorHessianFunction` of `f` and its Jacobian (vector of gradients) `Jf` and (vector of) Hessians, where `f` maps into the Euclidean space of dimension `range_dimension`. Their types are specified by the `function_type`, and `jacobian_type`, and `hessian_type`, respectively. The Jacobian and Hessian can further be given as an allocating variant or an inplace-variant, specified by the `evaluation=` keyword. """ + +@doc "$(_doc_vhf)" struct VectorHessianFunction{ E<:AbstractEvaluationType, FT<:AbstractVectorialType, @@ -262,54 +299,517 @@ function VectorHessianFunction( ) end -@doc raw""" - get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p[, i=:]) +_vgf_index_to_length(b::BitVector, n) = sum(b) +_vgf_index_to_length(::Colon, n) = n +_vgf_index_to_length(i::AbstractArray{<:Integer}, n) = length(i) +_vgf_index_to_length(r::UnitRange{<:Integer}, n) = length(r) -Evaluate the vector function [`VectorGradientFunction`](@ref) `vgf` at `p`. -The `range` can be used to specify a potential range, but is currently only present for consistency. +# +# +# ---- Hessian +@doc raw""" + get_hessian(M::AbstractManifold, vgf::VectorHessianFunction, p, X, i) + get_hessian(M::AbstractManifold, vgf::VectorHessianFunction, p, X, i, range) + get_hessian!(M::AbstractManifold, X, vgf::VectorHessianFunction, p, X, i) + get_hessian!(M::AbstractManifold, X, vgf::VectorHessianFunction, p, X, i, range) -The `i` can be a linear index, you can provide +Evaluate the Hessians of the vector function `vgf` on the manifold `M` at `p` in direction `X` +and the values given in `range`, specifying the representation of the gradients. +Since `i` is assumed to be a linear index, you can provide * a single integer * a `UnitRange` to specify a range to be returned like `1:3` * a `BitVector` specifying a selection * a `AbstractVector{<:Integer}` to specify indices -* `:` to return the vector of all gradients, which is also the default +* `:` to return the vector of all Hessian evaluations +""" +get_hessian( + M::AbstractManifold, + vgf::VectorHessianFunction, + p, + X, + i, + range::Union{AbstractPowerRepresentation,Nothing}=nothing, +) +# Generic case, allocate (a) a single tangent vector +function get_hessian( + M::AbstractManifold, + vhf::VectorHessianFunction, + p, + X, + i::Integer, + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), +) + Y = zero_vector(M, p) + return get_hessian!(M, Y, vhf, p, X, i, range) +end +# (b) UnitRange and AbstractVector allow to use length for BitVector its sum +function get_hessian( + M::AbstractManifold, + vhf::VectorHessianFunction, + p, + X, + i=:, # as long as the length can be found it should work, see _vgf_index_to_length + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), +) + n = _vgf_index_to_length(i, vhf.range_dimension) + pM = PowerManifold(M, range, n) + P = fill(p, pM) + Y = zero_vector(pM, P) + return get_hessian!(M, Y, vhf, p, X, i, range) +end + +# +# +# Part I: allocation +# I (a) a vector of functions +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, + p, + X, + i::Integer, + ::Union{AbstractPowerRepresentation,Nothing}=nothing, +) where {FT,JT} + return copyto!(M, Y, p, vhf.hessians!![i](M, p, X)) +end +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, + p, + X, + i, + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), +) where {FT,JT} + n = _vgf_index_to_length(i, vhf.range_dimension) + pM = PowerManifold(M, range, n) + rep_size = representation_size(M) + # In the resulting `X` the indices are linear, + # in `jacobian[i]` the functions f are ordered in a linear sense + for (j, f) in zip(1:n, vhf.hessians!![i]) + copyto!(M, _write(pM, rep_size, Y, (j,)), f(M, p, X)) + end + return Y +end +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, + p, + X, + i::Colon, + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), +) where {FT,JT} + n = _vgf_index_to_length(i, vhf.range_dimension) + pM = PowerManifold(M, range, n) + rep_size = representation_size(M) + for (j, f) in enumerate(vhf.hessians!!) + copyto!(M, _write(pM, rep_size, Y, (j,)), p, f(M, p, X)) + end + return Y +end +# Part I(c) A single gradient function +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:FunctionVectorialType}, + p, + X, + i, + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), +) where {FT,JT} + n = _vgf_index_to_length(i, vhf.range_dimension) + mP = PowerManifold(M, range, n) + copyto!(mP, Y, vhf.hessians!!(M, p, X)[mP, i]) + return Y +end +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:FunctionVectorialType}, + p, + X, + i::Integer, + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), +) where {FT,JT} + mP = PowerManifold(M, range, vhf.range_dimension) + copyto!(M, Y, p, vhf.hessians!!(M, p, X)[mP, i]) + return Y +end +# +# +# Part II: in-place evaluations +# (a) a vector of functions +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:ComponentVectorialType}, + p, + X, + i::Integer, + ::Union{AbstractPowerRepresentation,Nothing}=nothing, +) where {FT,JT} + return vhf.hessians!![i](M, Y, p, X) +end +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:ComponentVectorialType}, + p, + X, + i, + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), +) where {FT,JT} + n = _vgf_index_to_length(i, vhf.range_dimension) + pM = PowerManifold(M, range, n) + rep_size = representation_size(M) + # In the resulting X the indices are linear, + # in jacobian[i] have the functions f are also given n a linear sense + for (j, f) in zip(1:n, vhf.hessians!![i]) + f(M, _write(pM, rep_size, Y, (j,)), p, X) + end + return Y +end +# II(b) a single function +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:FunctionVectorialType}, + p, + X, + i::Integer, + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), +) where {FT,JT} + pM = PowerManifold(M, range, vhf.range_dimension...) + P = fill(p, pM) + y = zero_vector(pM, P) + vhf.hessians!!(M, y, p, X) + copyto!(M, Y, p, y[pM, i]) + return Y +end +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:FunctionVectorialType}, + p, + X, + i, + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), +) where {FT,JT} + #Single access for function is a bit expensive + n = _vgf_index_to_length(i, vhf.range_dimension) + pM_out = PowerManifold(M, range, n) + pM_temp = PowerManifold(M, range, vhf.range_dimension) + P = fill(p, pM_temp) + y = zero_vector(pM_temp, P) + vhf.hessians!!(M, y, p, X) + # Luckily all documented access functions work directly on `x[pM_temp,...]` + copyto!(pM_out, Y, P[pM_temp, i], y[pM_temp, i]) + return Y +end + +get_hessian_function(vgf::VectorHessianFunction, recursive::Bool=false) = vgf.hessians!! + +# +# +# --- Jacobian + +# A small helper function to change the basis of a Jacobian """ -get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p, i) -function get_value( - M::AbstractManifold, vgf::AbstractVectorFunction{E,<:FunctionVectorialType}, p, i=: -) where {E<:AbstractEvaluationType} - c = vgf.value!!(M, p) - if isa(c, Number) - return c - else - return c[i] + _change_basis!(M::AbstractManifold, JF, p, from_basis::B1, to_basis::B; X=zero_vector(M,p)) + +Given a jacobian matrix `JF` on a manifold `M` at `p` with respect to the `from_basis` +in the tangent space of `p` on `M`. Change the basis of the Jacobian to `to_basis` in place of `JF`. + +# Keyword Arguments +* `X` a temporary vector to store a generated vector, before decomposing it again with respect to the new basis +""" +function _change_basis!( + M, JF, p, from_basis::B1, to_basis::B2; X=zero_vector(M, p) +) where {B1<:AbstractBasis,B2<:AbstractBasis} + # change every row to new basis + for i in 1:size(JF, 1) # every row + get_vector!(M, X, p, JF[i, :], from_basis) + get_coordinates!(M, JF[i, :], p, X, to_basis) + end + return JF +end +# case we have the same basis: nothing to do, just return JF +function _change_basis!( + M, JF, p, from_basis::B, to_basis_new::B; kwargs... +) where {B<:AbstractBasis} + return JF +end +_doc_get_jacobian_vgf = """ + get_jacobian(M::AbstractManifold, vgf::AbstractVectorGradientFunction, p; kwargs...) + get_jacobian(M::AbstractManifold, J, vgf::AbstractVectorGradientFunction, p; kwargs...) + +Compute the Jacobian ``J_F ∈ ℝ^{m×n}`` of the [`AbstractVectorGradientFunction`](@ref) ``F`` at `p` on the `M`. + +There are two interpretations of the Jacobian of a vectorial function ``F: $(_math(:M)) → ℝ^m`` on a manifold. +Both depend on choosing a basis on the tangent space ``$(_math(:TpM))`` which we denote by +``Y_1,…,Y_n``, where `n` is the $(_link(:manifold_dimension))`(M)`. +We can write any tangent vector ``X = $(_tex(:displaystyle))$(_tex(:sum))_i c_iY_i`` + +1. The Jacobian ``J_F`` is the matrix with respect to the basis ``Y_1,…,Y_n`` such that +for any ``X∈$(_math(:TpM))`` we have the equality of the differential ``DF(p)[X] = Jc``. + In other words, the `j`th column of ``J`` is given by ``DF(p)[Y_j]`` + +2. Given the gradients ``$(_tex(:grad)) F_i(p)`` of the component functions ``F_i: $(_math(:M)) → ℝ``, + we define the jacobian function as + + ````math + J(X) = $(_tex(:pmatrix, "⟨$(_tex(:grad)) F_1, X⟩_p", "⟨$(_tex(:grad)) F_1, X⟩_p", "⋮", "⟨$(_tex(:grad)) F_1, X⟩_p")) + ```` + + Then either the ``j``th column of ``J_F`` is given by ``J(Y_i)`` or the ``i``th row is given by all inner products ``$(_tex(:grad)) F_1, Y_j⟩_p`` of the ``i``th gradient function with all basis vectors ``Y_j``. + +The computation can be computed in-place of `J`. + +# Keyword arguments + +* `basis::AbstractBasis = `[`get_basis`](@ref)`(vgf)` + for the [`CoordinateVectorialType`](@ref) of the vectorial functions gradient, this + might lead to a change of basis, if this basis and the one the coordinates are given in do not agree. +* `range::AbstractPowerRepresentation = `[`get_range`](@ref)`(vgf.jacobian_type)` + specify the range of the gradients in the case of a [`FunctionVectorialType`](@ref), + that is, on which type of power manifold the gradient is given on. +""" + +@doc "$(_doc_get_jacobian_vgf)" +get_jacobian(::AbstractManifold, ::AbstractVectorGradientFunction, p; kwargs...) + +# Part I: allocating vgf – allocating jacobian +# (a) We have a single gradient function +function get_jacobian( + M::AbstractManifold, + vgf::VGF, + p; + basis::B=get_basis(vgf.jacobian_type), + range::AbstractPowerRepresentation=get_range(vgf.jacobian_type), +) where { + FT, + VGF<:AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, + B<:AbstractBasis, +} + n = vgf.range_dimension + d = manifold_dimension(M) + gradients = vgf.jacobian!!(M, p) + mP = PowerManifold(M, range, vgf.range_dimension) + # generate the first row to get an eltype + c1 = get_coordinates(M, p, gradients[mP, 1], basis) + JF = zeros(eltype(c1), n, d) + JF[1, :] .= c1 + for i in 2:n + JF[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) + end + return JF +end +# (b) We have a vector of gradient functions +function get_jacobian( + M::AbstractManifold, vgf::VGF, p; basis=get_basis(vgf.jacobian_type), kwargs... +) where { + FT, + VGF<:AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, +} + n = vgf.range_dimension + d = manifold_dimension(M) + # generate the first row to get an eltype + c1 = get_coordinates(M, p, vgf.jacobian!![1](M, p), basis) + JF = zeros(eltype(c1), n, d) + JF[1, :] .= c1 + for i in 2:n + JF[i, :] .= get_coordinates(M, p, vgf.jacobian!![i](M, p), basis) + end + return JF +end +# (c) We have a Jacobian function +function get_jacobian( + M::AbstractManifold, vgf::VGF, p; basis=get_basis(vgf.jacobian_type), kwargs... +) where { + FT, + VGF<:AbstractVectorGradientFunction{ + <:AllocatingEvaluation,FT,<:CoordinateVectorialType + }, +} + JF = vgf.jacobian!!(M, p) + _change_basis!(M, p, JF, vgf.jacobian_type.basis, basis) + return JF +end + +# Part II: mutating vgf – allocating jacobian +# (a) We have a single gradient function +function get_jacobian( + M::AbstractManifold, + vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, + p; + basis::B=DefaultOrthonormalBasis(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), +) where {FT,B<:AbstractBasis} + n = vgf.range_dimension + d = manifold_dimension(M) + mP = PowerManifold(M, range, vgf.range_dimension) + gradients = zero_vector(mP, fill(p, mP)) + vgf.jacobian!!(M, gradients, p) + # generate the first row to get an eltype + c1 = get_coordinates(M, p, gradients[mP, 1], basis) + JF = zeros(eltype(c1), n, d) + JF[1, :] .= c1 + for i in 2:n + JF[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) + end + return JF +end +# (b) We have a vector of gradient functions +function get_jacobian( + M::AbstractManifold, + vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType}, + p; + basis=get_basis(vgf.jacobian_type), +) where {FT} + n = vgf.range_dimension + d = manifold_dimension(M) + # generate the first row to get an eltype + X = zero_vector(M, p) + vgf.jacobian!![1](M, X, p) + c1 = get_coordinates(M, p, X, basis) + JF = zeros(eltype(c1), n, d) + JF[1, :] .= c1 + for i in 2:n + vgf.jacobian!![i](M, X, p) + JF[i, :] .= get_coordinates(M, p, X, basis) + end + return JF +end +# (c) We have a Jacobian function +function get_jacobian( + M::AbstractManifold, + vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType}, + p; + basis=get_basis(vgf.jacobian_type), +) where {FT} + c = get_coordinates(M, p, zero_vector(M, p)) + JF = zeros(eltype(c), vgf.range_dimension, manifold_dimension(M)) + vgf.jacobian!!(M, JF, p) + _change_basis!(M, JF, p, vgf.jacobian_type.basis, basis) + return JF +end + +function get_jacobian! end +@doc "$(_doc_get_jacobian_vgf)" +get_jacobian!(M::AbstractManifold, JF, vgf::AbstractVectorGradientFunction, p) + +# Part I: allocating vgf – inplace jacobian +# (a) We have a single gradient function +function get_jacobian!( + M::AbstractManifold, + J, + vgf::VGF, + p; + basis::B=get_basis(vgf.jacobian_type), + range::AbstractPowerRepresentation=get_range(vgf.jacobian_type), +) where { + FT, + VGF<:AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, + B<:AbstractBasis, +} + n = vgf.range_dimension + gradients = vgf.jacobian!!(M, p) + mP = PowerManifold(M, range, vgf.range_dimension) + for i in 1:n + c = @view J[i, :] + get_coordinates!(M, c, p, gradients[mP, i], basis) + end + return J +end +# (b) We have a vector of gradient functions +function get_jacobian!( + M::AbstractManifold, JF, vgf::VGF, p; basis=get_basis(vgf.jacobian_type) +) where { + FT, + VGF<:AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, +} + for i in 1:(vgf.range_dimension) + JF[i, :] .= get_coordinates(M, p, vgf.jacobian!![i](M, p), basis) end + return JF end -function get_value( +# (c) We have a Jacobian function +function get_jacobian!( + M::AbstractManifold, JF, vgf::VGF, p; basis=get_basis(vgf.jacobian_type) +) where { + FT, + VGF<:AbstractVectorGradientFunction{ + <:AllocatingEvaluation,FT,<:CoordinateVectorialType + }, +} + JF .= vgf.jacobian!!(M, p) + _change_basis!(M, JF, p, vgf.jacobian_type.basis, basis) + return JF +end + +# Part II: mutating vgf – allocating jacobian +# (a) We have a single gradient function +function get_jacobian!( M::AbstractManifold, - vgf::AbstractVectorFunction{E,<:ComponentVectorialType}, - p, - i::Integer, -) where {E<:AbstractEvaluationType} - return vgf.value!![i](M, p) + JF, + vgf::VGF, + p; + basis::B=DefaultOrthonormalBasis(), + range::AbstractPowerRepresentation=get_range(vgf.jacobian_type), +) where { + FT, + VGF<:AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, + B<:AbstractBasis, +} + mP = PowerManifold(M, range, vgf.range_dimension) + gradients = zero_vector(mP, fill(p, mP)) + vgf.jacobian!!(M, gradients, p) + for i in 1:(vgf.range_dimension) + JF[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) + end + return JF end -function get_value( - M::AbstractManifold, vgf::AbstractVectorFunction{E,<:ComponentVectorialType}, p, i=: -) where {E<:AbstractEvaluationType} - return [f(M, p) for f in vgf.value!![i]] +# (b) We have a vector of gradient functions +function get_jacobian!( + M::AbstractManifold, + JF, + vgf::VGF, + p; + basis=get_basis(vgf.jacobian_type), + X=zero_vector(M, p), +) where { + FT,VGF<:AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType} +} + for i in 1:(vgf.range_dimension) + vgf.jacobian!![i](M, X, p) + JF[i, :] .= get_coordinates(M, p, X, basis) + end + return JF +end +# (c) We have a Jacobian function +function get_jacobian!( + M::AbstractManifold, JF, vgf::VGF, p; basis=get_basis(vgf.jacobian_type) +) where { + FT,VGF<:AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType} +} + vgf.jacobian!!(M, JF, p) + _change_basis!(M, JF, p, vgf.jacobian_type.basis, basis) + return JF end -@doc raw""" - get_value_function(vgf::VectorGradientFunction, recursive=false) - -return the internally stored function computing [`get_value`](@ref). -""" -function get_value_function(vgf::VectorGradientFunction, recursive=false) - return vgf.value!! +function get_jacobian_basis(vgf::AbstractVectorGradientFunction) + return _get_jacobian_basis(vgf.jacobian_type) end +_get_jacobian_basis(jt::AbstractVectorialType) = DefaultOrthonormalBasis() +_get_jacobian_basis(jt::CoordinateVectorialType) = jt.basis + +# +# +# ---- Gradient @doc raw""" get_gradient(M::AbstractManifold, vgf::VectorGradientFunction, p, i) get_gradient(M::AbstractManifold, vgf::VectorGradientFunction, p, i, range) @@ -334,18 +834,13 @@ get_gradient( range::Union{AbstractPowerRepresentation,Nothing}=nothing, ) -_vgf_index_to_length(b::BitVector, n) = sum(b) -_vgf_index_to_length(::Colon, n) = n -_vgf_index_to_length(i::AbstractArray{<:Integer}, n) = length(i) -_vgf_index_to_length(r::UnitRange{<:Integer}, n) = length(r) - # Generic case, allocate (a) a single tangent vector function get_gradient( M::AbstractManifold, vgf::AbstractVectorGradientFunction, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) X = zero_vector(M, p) return get_gradient!(M, X, vgf, p, i, range) @@ -356,12 +851,11 @@ function get_gradient( vgf::AbstractVectorGradientFunction, p, i=:, # as long as the length can be found it should work, see _vgf_index_to_length - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) - P = fill(p, pM) - X = zero_vector(pM, P) + X = zero_vector(pM, fill(p, pM)) return get_gradient!(M, X, vgf, p, i, range) end # (c) Special cases where allocations can be skipped @@ -397,7 +891,7 @@ function get_gradient!( }, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} JF = vgf.jacobian!!(M, p) get_vector!(M, X, p, JF[i, :], vgf.jacobian_type.basis) #convert rows to gradients @@ -411,7 +905,7 @@ function get_gradient!( }, p, i=:, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -439,7 +933,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -457,7 +951,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, p, i::Colon, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -474,7 +968,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} n = _vgf_index_to_length(i, vgf.range_dimension) mP = PowerManifold(M, range, n) @@ -487,7 +981,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} mP = PowerManifold(M, range, vgf.range_dimension) copyto!(M, X, p, vgf.jacobian!!(M, p)[mP, i]) @@ -503,12 +997,11 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType}, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} # a type wise safe way to allocate what usually should yield a n-times-d matrix pM = PowerManifold(M, range, vgf.range_dimension...) - P = fill(p, pM) - Y = zero_vector(pM, P) + Y = zero_vector(pM, fill(p, pM)) JF = reshape( get_coordinates(pM, fill(p, pM), Y, vgf.jacobian_type.basis), power_dimensions(pM)..., @@ -524,7 +1017,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} # a type wise safe way to allocate what usually should yield a n-times-d matrix pM = PowerManifold(M, range, vgf.range_dimension...) @@ -549,7 +1042,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType}, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=nothing, + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} return vgf.jacobian!![i](M, X, p) end @@ -559,7 +1052,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -578,7 +1071,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} pM = PowerManifold(M, range, vgf.range_dimension...) P = fill(p, pM) @@ -593,7 +1086,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} #Single access for function is a bit expensive n = _vgf_index_to_length(i, vgf.range_dimension) @@ -611,212 +1104,93 @@ get_gradient_function(vgf::VectorGradientFunction, recursive=false) = vgf.jacobi # # -# ---- Hessian +# ---- Value @doc raw""" - get_hessian(M::AbstractManifold, vgf::VectorHessianFunction, p, X, i) - get_hessian(M::AbstractManifold, vgf::VectorHessianFunction, p, X, i, range) - get_hessian!(M::AbstractManifold, X, vgf::VectorHessianFunction, p, X, i) - get_hessian!(M::AbstractManifold, X, vgf::VectorHessianFunction, p, X, i, range) + get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p[, i=:]) + get_value!(M::AbstractManifold, V, vgf::AbstractVectorFunction, p[, i=:]) -Evaluate the Hessians of the vector function `vgf` on the manifold `M` at `p` in direction `X` -and the values given in `range`, specifying the representation of the gradients. +Evaluate the vector function [`VectorGradientFunction`](@ref) `vgf` at `p`. +The `range` can be used to specify a potential range, but is currently only present for consistency. + +The `i` can be a linear index, you can provide -Since `i` is assumed to be a linear index, you can provide * a single integer * a `UnitRange` to specify a range to be returned like `1:3` * a `BitVector` specifying a selection * a `AbstractVector{<:Integer}` to specify indices -* `:` to return the vector of all gradients -""" -get_hessian( - M::AbstractManifold, - vgf::VectorHessianFunction, - p, - X, - i, - range::Union{AbstractPowerRepresentation,Nothing}=nothing, -) - -# Generic case, allocate (a) a single tangent vector -function get_hessian( - M::AbstractManifold, - vhf::VectorHessianFunction, - p, - X, - i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) - Y = zero_vector(M, p) - return get_hessian!(M, Y, vhf, p, X, i, range) -end -# (b) UnitRange and AbstractVector allow to use length for BitVector its sum -function get_hessian( - M::AbstractManifold, - vhf::VectorHessianFunction, - p, - X, - i=:, # as long as the length can be found it should work, see _vgf_index_to_length - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) - n = _vgf_index_to_length(i, vhf.range_dimension) - pM = PowerManifold(M, range, n) - P = fill(p, pM) - Y = zero_vector(pM, P) - return get_hessian!(M, Y, vhf, p, X, i, range) -end +* `:` to return the vector of all gradients, which is also the default -# -# -# Part I: allocation -# I (a) a vector of functions -function get_hessian!( - M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, - p, - X, - i::Integer, - ::Union{AbstractPowerRepresentation,Nothing}=nothing, -) where {FT,JT} - return copyto!(M, Y, p, vhf.hessians!![i](M, p, X)) -end -function get_hessian!( - M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, - p, - X, - i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - n = _vgf_index_to_length(i, vhf.range_dimension) - pM = PowerManifold(M, range, n) - rep_size = representation_size(M) - # In the resulting `X` the indices are linear, - # in `jacobian[i]` the functions f are ordered in a linear sense - for (j, f) in zip(1:n, vhf.hessians!![i]) - copyto!(M, _write(pM, rep_size, Y, (j,)), f(M, p, X)) - end - return Y -end -function get_hessian!( - M::AbstractManifold, - Y, - vgf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, - p, - X, - i::Colon, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - n = _vgf_index_to_length(i, vgf.range_dimension) - pM = PowerManifold(M, range, n) - rep_size = representation_size(M) - for (j, f) in enumerate(vgf.hessians!!) - copyto!(M, _write(pM, rep_size, Y, (j,)), p, f(M, p, X)) +This function can perform the evaluation inplace of `V`. +""" +get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p, i) +function get_value( + M::AbstractManifold, vgf::AbstractVectorFunction{E,<:FunctionVectorialType}, p, i=: +) where {E<:AllocatingEvaluation} + c = vgf.value!!(M, p) + if isa(c, Number) + return c + else + return c[i] end - return Y -end -# Part I(c) A single gradient function -function get_hessian!( - M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:FunctionVectorialType}, - p, - X, - i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - n = _vgf_index_to_length(i, vhf.range_dimension) - mP = PowerManifold(M, range, n) - copyto!(mP, Y, vhf.hessians!!(M, p, X)[mP, i]) - return Y end -function get_hessian!( +function get_value( M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:FunctionVectorialType}, + vgf::AbstractVectorFunction{E,<:ComponentVectorialType}, p, - X, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - mP = PowerManifold(M, range, vhf.range_dimension) - copyto!(M, Y, p, vhf.hessians!!(M, p, X)[mP, i]) - return Y +) where {E<:AbstractEvaluationType} + return vgf.value!![i](M, p) end -# -# -# Part II: in-place evaluations -# (a) a vector of functions -function get_hessian!( - M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:ComponentVectorialType}, - p, - X, - i::Integer, - ::Union{AbstractPowerRepresentation,Nothing}=nothing, -) where {FT,JT} - return vhf.hessians!![i](M, Y, p, X) +function get_value( + M::AbstractManifold, vgf::AbstractVectorFunction{E,<:ComponentVectorialType}, p, i=: +) where {E<:AbstractEvaluationType} + return [f(M, p) for f in vgf.value!![i]] end -function get_hessian!( +function get_value( M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:ComponentVectorialType}, + vgf::AbstractVectorFunction{E,<:FunctionVectorialType}, p, - X, - i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - n = _vgf_index_to_length(i, vhf.range_dimension) - pM = PowerManifold(M, range, n) - rep_size = representation_size(M) - # In the resulting X the indices are linear, - # in jacobian[i] have the functions f are also given n a linear sense - for (j, f) in zip(1:n, vhf.hessians!![i]) - f(M, _write(pM, rep_size, Y, (j,)), p, X) - end - return Y + i=:; + value_cache=zeros(vgf.range_dimension), +) where {E<:InplaceEvaluation} + vgf.value!!(M, value_cache, p) + return value_cache[i] end -# II(b) a single function -function get_hessian!( +# A ComponentVectorialType Inplace does that make sense, since those would be real - valued functions + +function get_value!( M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:FunctionVectorialType}, + V, + vgf::AbstractVectorFunction{AllocatingEvaluation,<:FunctionVectorialType}, p, - X, - i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - pM = PowerManifold(M, range, vhf.range_dimension...) - P = fill(p, pM) - y = zero_vector(pM, P) - vhf.hessians!!(M, y, p, X) - copyto!(M, Y, p, y[pM, i]) - return Y + i=:, +) + c = vgf.value!!(M, p) + V .= c[i] + return V end -function get_hessian!( + +function get_value!( M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:FunctionVectorialType}, + V, + vgf::AbstractVectorFunction{InplaceEvaluation,<:FunctionVectorialType}, p, - X, - i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - #Single access for function is a bit expensive - n = _vgf_index_to_length(i, vhf.range_dimension) - pM_out = PowerManifold(M, range, n) - pM_temp = PowerManifold(M, range, vhf.range_dimension) - P = fill(p, pM_temp) - y = zero_vector(pM_temp, P) - vhf.hessians!!(M, y, p, X) - # Luckily all documented access functions work directly on `x[pM_temp,...]` - copyto!(pM_out, Y, P[pM_temp, i], y[pM_temp, i]) - return Y + i=:; + value_cache=zeros(vgf.range_dimension), +) + vgf.value!!(M, value_cache, p) + V .= value_cache[i] + return V end -get_hessian_function(vgf::VectorHessianFunction, recursive::Bool=false) = vgf.hessians!! +@doc raw""" + get_value_function(vgf::VectorGradientFunction, recursive=false) + +return the internally stored function computing [`get_value`](@ref). +""" +function get_value_function(vgf::VectorGradientFunction, recursive=false) + return vgf.value!! +end @doc raw""" length(vgf::AbstractVectorFunction) diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index a4af0cbfa4..e5e26c635e 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -1,36 +1,41 @@ -_doc_LM_formula = raw""" -```math -\operatorname*{arg\,min}_{p ∈ \mathcal M} \frac{1}{2} \lVert f(p) \rVert^2, -``` -""" _doc_LM = """ - LevenbergMarquardt(M, f, jacobian_f, p, num_components=-1) + LevenbergMarquardt(M, f, jacobian_f, p, num_components=-1; kwargs...) + LevenbergMarquardt(M, vgf, p; kwargs...) + LevenbergMarquardt(M, nlso, p; kwargs...) LevenbergMarquardt!(M, f, jacobian_f, p, num_components=-1; kwargs...) + LevenbergMarquardt!(M, vgf, p, num_components=-1; kwargs...) + LevenbergMarquardt!(M, nlso, p, num_components=-1; kwargs...) -Solve an optimization problem of the form +compute the the Riemannian Levenberg-Marquardt algorithm [Peeters:1993, AdachiOkunoTakeda:2022](@cite) +to solve -$(_doc_LM_formula) +$(_problem(:NonLinearLeastSquares)) -where ``f: $(_math(:M)) → ℝ^d`` is a continuously differentiable function, -using the Riemannian Levenberg-Marquardt algorithm [Peeters:1993](@cite). -The implementation follows Algorithm 1 [AdachiOkunoTakeda:2022](@cite). -The second signature performs the optimization in-place of `p`. +The second block of signatures perform the optimization in-place of `p`. # Input $(_var(:Argument, :M; type=true)) -* `f`: a cost function ``f: $(_math(:M))→ℝ^d`` -* `jacobian_f`: the Jacobian of ``f``. The Jacobian is supposed to accept a keyword argument - `basis_domain` which specifies basis of the tangent space at a given point in which the - Jacobian is to be calculated. By default it should be the `DefaultOrthonormalBasis`. +* `f`: a cost function ``f: $(_math(:M))→ℝ^m``. + The cost function can be provided in two different ways + * as a single function returning a vector ``f(p) ∈ ℝ^m`` + * as a vector of functions, where each single function returns a scalar ``f_i(p) ∈ ℝ`` + The type is determined by the `function_type=` keyword argument. +* `jacobian_f`: the Jacobian of ``f``. + The Jacobian can be provided in three different ways + * as a single function returning a vector of gradient vectors ``$(_tex(:bigl))($(_tex(:grad)) f_i(p)$(_tex(:bigr)))_{i=1}^m`` + * as a vector of functions, where each single function returns a gradient vector ``$(_tex(:grad)) f_i(p)``, ``i=1,…,m`` + * as a single function returning a (coefficient) matrix ``J ∈ ℝ^{m×d}``, where ``d`` is the dimension of the manifold. + These coefficients are given with respect to an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) of the tangent space at `p`. + The type is determined by the `jacobian_type=` keyword argument. $(_var(:Argument, :p)) -* `num_components`: length of the vector returned by the cost function (`d`). +* `num_components`: length ``m`` of the vector returned by the cost function. By default its value is -1 which means that it is determined automatically by - calling `f` one additional time. This is only possible when `evaluation` is `AllocatingEvaluation`, + calling `f` one additional time. This is only possible when `evaluation` is [`AllocatingEvaluation`](@ref), for mutating evaluation this value must be explicitly specified. -These can also be passed as a [`NonlinearLeastSquaresObjective`](@ref), -then the keyword `jacobian_tangent_basis` below is ignored +You can also provide the cost and its Jacobian already as a[`VectorGradientFunction`](@ref) `vgf`, +Alternatively, passing a [`NonlinearLeastSquaresObjective`](@ref) `nlso`. # Keyword arguments @@ -40,13 +45,13 @@ $(_var(:Keyword, :evaluation)) residual (objective) at minimum is equal to 0. * `damping_term_min=0.1`: initial (and also minimal) value of the damping term * `β=5.0`: parameter by which the damping term is multiplied when the current new point is rejected +* `function_type=`[`FunctionVectorialType`](@ref): an [`AbstractVectorialType`](@ref) specifying the type of cost function provided. * `initial_jacobian_f`: the initial Jacobian of the cost function `f`. By default this is a matrix of size `num_components` times the manifold dimension of similar type as `p`. * `initial_residual_values`: the initial residual vector of the cost function `f`. By default this is a vector of length `num_components` of similar type as `p`. -* `jacobian_tangent_basis`: an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) specify the basis of the tangent space for `jacobian_f`. +* `jacobian_type=`[`FunctionVectorialType`](@ref): an [`AbstractVectorialType`](@ref) specifying the type of Jacobian provided. $(_var(:Keyword, :retraction_method)) -$(_var(:Keyword, :stopping_criterion; default="[`StopAfterIteration`](@ref)`(200)`$(_sc(:Any))[`StopWhenGradientNormLess`](@ref)`(1e-12)`")) $(_note(:OtherKeywords)) @@ -74,7 +79,8 @@ function LevenbergMarquardt( p, num_components::Int=-1; evaluation::AbstractEvaluationType=AllocatingEvaluation(), - jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis(), + function_type::AbstractVectorialType=FunctionVectorialType(), + jacobian_type::AbstractVectorialType=CoordinateVectorialType(DefaultOrthonormalBasis()), kwargs..., ) if num_components == -1 @@ -88,13 +94,24 @@ function LevenbergMarquardt( ) end end - nlso = NonlinearLeastSquaresObjective( + vgf = VectorGradientFunction( f, jacobian_f, num_components; evaluation=evaluation, - jacobian_tangent_basis=jacobian_tangent_basis, + function_type=function_type, + jacobian_type=jacobian_type, ) + return LevenbergMarquardt(M, vgf, p; evaluation=evaluation, kwargs...) +end +function LevenbergMarquardt( + M::AbstractManifold, + vgf::VectorGradientFunction, + p; + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + kwargs..., +) + nlso = NonlinearLeastSquaresObjective(vgf) return LevenbergMarquardt(M, nlso, p; evaluation=evaluation, kwargs...) end function LevenbergMarquardt( @@ -114,6 +131,8 @@ function LevenbergMarquardt!( num_components::Int=-1; evaluation::AbstractEvaluationType=AllocatingEvaluation(), jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis(), + jacobian_type=CoordinateVectorialType(jacobian_tangent_basis), + function_type=FunctionVectorialType(), kwargs..., ) if num_components == -1 @@ -132,7 +151,8 @@ function LevenbergMarquardt!( jacobian_f, num_components; evaluation=evaluation, - jacobian_tangent_basis=jacobian_tangent_basis, + jacobian_type=jacobian_type, + function_type=function_type, ) return LevenbergMarquardt!(M, nlso, p; evaluation=evaluation, kwargs...) end @@ -149,13 +169,12 @@ function LevenbergMarquardt!( β::Real=5.0, η::Real=0.2, damping_term_min::Real=0.1, - initial_residual_values=similar(p, get_objective(nlso).num_components), + initial_residual_values=similar(p, length(get_objective(nlso).objective)), initial_jacobian_f=similar( - p, get_objective(nlso).num_components, manifold_dimension(M) + p, length(get_objective(nlso).objective), manifold_dimension(M) ), kwargs..., #collect rest ) where {O<:Union{NonlinearLeastSquaresObjective,AbstractDecoratedManifoldObjective}} - i_nlso = get_objective(nlso) # un-decorate for safety dnlso = decorate_objective!(M, nlso; kwargs...) nlsp = DefaultManoptProblem(M, dnlso) lms = LevenbergMarquardtState( @@ -178,85 +197,35 @@ end # Solver functions # function initialize_solver!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, - lms::LevenbergMarquardtState, -) where {mT<:AbstractManifold} - M = get_manifold(dmp) - lms.residual_values = get_objective(dmp).f(M, lms.p) - lms.X = get_gradient(dmp, lms.p) - return lms -end -function initialize_solver!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{InplaceEvaluation}}, + dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, lms::LevenbergMarquardtState, ) where {mT<:AbstractManifold} M = get_manifold(dmp) - get_objective(dmp).f(M, lms.residual_values, lms.p) - get_gradient_from_Jacobian!(M, lms.X, get_objective(dmp), lms.p, lms.jacF) - return lms -end - -function _maybe_get_basis(M::AbstractManifold, p, B::AbstractBasis) - if requires_caching(B) - return get_basis(M, p, B) - else - return B - end -end - -function get_jacobian!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, - jacF, - p, - basis_domain::AbstractBasis, -) where {mT} nlso = get_objective(dmp) - return copyto!(jacF, nlso.jacobian!!(get_manifold(dmp), p; basis_domain=basis_domain)) -end -function get_jacobian!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{InplaceEvaluation}}, - jacF, - p, - basis_domain::AbstractBasis, -) where {mT} - nlso = get_objective(dmp) - return nlso.jacobian!!(get_manifold(dmp), jacF, p; basis_domain=basis_domain) -end - -function get_residuals!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, - residuals, - p, -) where {mT} - return copyto!(residuals, get_objective(dmp).f(get_manifold(dmp), p)) -end -function get_residuals!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{InplaceEvaluation}}, - residuals, - p, -) where {mT} - return get_objective(dmp).f(get_manifold(dmp), residuals, p) + get_residuals!(M, lms.residual_values, nlso, lms.p) + get_jacobian!(M, lms.jacobian, nlso, lms.p) + get_gradient!(M, lms.X, nlso, lms.p; jacobian_cache=lms.jacobian) + return lms end function step_solver!( dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, lms::LevenbergMarquardtState, - k::Integer, + ::Integer, ) where {mT<:AbstractManifold} # `o.residual_values` is either initialized by `initialize_solver!` or taken from the previous iteration M = get_manifold(dmp) nlso = get_objective(dmp) - basis_ox = _maybe_get_basis(M, lms.p, nlso.jacobian_tangent_basis) # a new Jacobian is only needed if the last step was successful if lms.last_step_successful - get_jacobian!(dmp, lms.jacF, lms.p, basis_ox) + get_jacobian!(M, lms.jacobian, nlso, lms.p) end λk = lms.damping_term * norm(lms.residual_values)^2 - - JJ = transpose(lms.jacF) * lms.jacF + λk * I + basis_ox = get_basis(nlso.objective.jacobian_type) + JJ = transpose(lms.jacobian) * lms.jacobian + λk * I # `cholesky` is technically not necessary but it's the fastest method to solve the # problem because JJ is symmetric positive definite - grad_f_c = transpose(lms.jacF) * lms.residual_values + grad_f_c = transpose(lms.jacobian) * lms.residual_values sk = cholesky(JJ) \ -grad_f_c get_vector!(M, lms.X, lms.p, grad_f_c, basis_ox) @@ -265,11 +234,11 @@ function step_solver!( temp_x = retract(M, lms.p, lms.step_vector, lms.retraction_method) normFk2 = norm(lms.residual_values)^2 - get_residuals!(dmp, lms.candidate_residual_values, temp_x) + get_value!(M, lms.candidate_residual_values, nlso.objective, temp_x) ρk = (normFk2 - norm(lms.candidate_residual_values)^2) / ( - -2 * inner(M, lms.p, lms.X, lms.step_vector) - norm(lms.jacF * sk)^2 - + -2 * inner(M, lms.p, lms.X, lms.step_vector) - norm(lms.jacobian * sk)^2 - λk * norm(sk)^2 ) if ρk >= lms.η diff --git a/src/solvers/interior_point_Newton.jl b/src/solvers/interior_point_Newton.jl index 112774d569..625bcbe3bd 100644 --- a/src/solvers/interior_point_Newton.jl +++ b/src/solvers/interior_point_Newton.jl @@ -58,7 +58,7 @@ $(_var(:Keyword, :evaluation)) * `g=nothing`: the inequality constraints * `grad_g=nothing`: the gradient of the inequality constraints * `grad_h=nothing`: the gradient of the equality constraints -* `gradient_range=nothing`: specify how gradients are represented, where `nothing` is equivalent to [`NestedPowerRepresentation`](@extref) +* `gradient_range=nothing`: specify how gradients are represented, where `nothing` is equivalent to [`NestedPowerRepresentation`](@extref `ManifoldsBase.NestedPowerRepresentation`) * `gradient_equality_range=gradient_range`: specify how the gradients of the equality constraints are represented * `gradient_inequality_range=gradient_range`: specify how the gradients of the inequality constraints are represented * `h=nothing`: the equality constraints diff --git a/test/plans/test_constrained_plan.jl b/test/plans/test_constrained_plan.jl index a0fd3842b9..7ebc1362f6 100644 --- a/test/plans/test_constrained_plan.jl +++ b/test/plans/test_constrained_plan.jl @@ -12,6 +12,7 @@ include("../utils/dummy_types.jl") hess_f!(M, Y, p, X) = (Y .= [2.0, 2.0, 2.0]) # Inequality constraints g(M, p) = [p[1] - 1, -p[2] - 1] + g!(M, V, p) = (V .= [p[1] - 1, -p[2] - 1]) # # Function grad_g(M, p) = [[1.0, 0.0, 0.0], [0.0, -1.0, 0.0]] grad_gA(M, p) = [1.0 0.0; 0.0 -1.0; 0.0 0.0] @@ -41,6 +42,7 @@ include("../utils/dummy_types.jl") ) == 2 # Equality Constraints h(M, p) = [2 * p[3] - 1] + h!(M, V, p) = (V .= [2 * p[3] - 1]) h1(M, p) = 2 * p[3] - 1 grad_h(M, p) = [[0.0, 0.0, 2.0]] grad_hA(M, p) = [[0.0, 0.0, 2.0];;] @@ -83,9 +85,9 @@ include("../utils/dummy_types.jl") cofm = ConstrainedManifoldObjective( f, grad_f!, - g, + g!, grad_g!, - h, + h!, grad_h!; evaluation=InplaceEvaluation(), inequality_constraints=2, @@ -149,9 +151,9 @@ include("../utils/dummy_types.jl") cofhm = ConstrainedManifoldObjective( f, grad_f!, - g, + g!, grad_g!, - h, + h!, grad_h!; hess_f=hess_f!, hess_g=hess_g!, diff --git a/test/plans/test_nonlinear_least_squares_plan.jl b/test/plans/test_nonlinear_least_squares_plan.jl new file mode 100644 index 0000000000..c54fc07c57 --- /dev/null +++ b/test/plans/test_nonlinear_least_squares_plan.jl @@ -0,0 +1,93 @@ +using Manifolds, Manopt, Test + +@testset "Nonlinear lest squares plan" begin + @testset "Test cost/residual/jacobian cases" begin + # a simple nlso objetive on R2 + M = Euclidean(2) + d1 = [1, 0] + d2 = [0, 1] + f1(M, x) = norm(x - d1) + f2(M, x) = norm(x - d2) + f(M, x) = [f1(M, x), f2(M, x)] + # Components + f!(M, V, x) = (V .= [f1(M, x), f2(M, x)]) + j1(M, x) = (x - d1) / norm(x - d1) + j1!(M, X, x) = (X .= (x - d1) / norm(x - d1)) + j2(M, x) = (x - d2) / norm(x - d2) + j2!(M, X, x) = (X .= (x - d2) / norm(x - d2)) + # Function + JF(M, x) = [j1(M, x), j2(M, x)] + JF!(M, JF, x) = (JF .= [j1(M, x), j2(M, x)]) + # Jacobi matrix + J(M, x) = cat(j1(M, x), j2(M, x); dims=2) + J!(M, J, x) = (J .= cat(j1(M, x), j2(M, x); dims=2)) + # Smoothing types + + # Test all (new) possible combinations of vectorial cost and Jacobian + # (1) [F]unction (Gradient), [C]omponent (Gradients), [J] Coordinate (Jacobian in Basis) + # (2) [a]llocating [i] inplace + nlsoFa = NonlinearLeastSquaresObjective( + f, JF, 2; jacobian_type=FunctionVectorialType() + ) + nlsoFi = NonlinearLeastSquaresObjective( + f!, + JF!, + 2; + evaluation=InplaceEvaluation(), + jacobian_type=FunctionVectorialType(), + ) + nlsoCa = NonlinearLeastSquaresObjective( + [f1, f2], + [j1, j2], + 2; + function_type=ComponentVectorialType(), + jacobian_type=ComponentVectorialType(), + ) + nlsoCi = NonlinearLeastSquaresObjective( + [f1, f2], + [j1!, j2!], + 2; + function_type=ComponentVectorialType(), + jacobian_type=ComponentVectorialType(), + evaluation=InplaceEvaluation(), + ) + nlsoJa = NonlinearLeastSquaresObjective( + f, J, 2; jacobian_type=CoordinateVectorialType() + ) + nlsoJi = NonlinearLeastSquaresObjective(f!, J!, 2; evaluation=InplaceEvaluation()) + + p = [0.5, 0.5] + V = [0.0, 0.0] + Vt = [1 / sqrt(2), 1 / sqrt(2)] + G = zeros(2, 2) + Gt = 1 / sqrt(2) .* [-1.0 1.0; 1.0 -1.0] + for nlso in [nlsoFa, nlsoFi, nlsoCa, nlsoCi, nlsoJa, nlsoJi] + c = get_cost(M, nlso, p) + @test c ≈ 0.5 + fill!(V, 0.0) + get_residuals!(M, V, nlso, p) + @test V == get_residuals(M, nlso, p) + @test V ≈ Vt + @test 0.5 * sum(abs.(V) .^ 2) ≈ c + fill!(G, 0.0) + get_jacobian!(M, G, nlso, p) + @test G == get_jacobian(M, nlso, p) + @test G == Gt + # since s1/s2 are the identity we can also always check agains the allocating + # jacobian of the objective + G2 = get_jacobian(M, nlso.objective, p) + @test G2 == Gt + end + end + @testset "Test Change of basis" begin + J = ones(2, 2) + Jt = ones(2, 2) + M = Euclidean(2) + p = [0.5, 0.5] + B1 = DefaultBasis() + B2 = DefaultOrthonormalBasis() + Manopt._change_basis!(M, J, p, B1, B2) + # In practice both are the same basis in coordinates, so Jtt stays as iss + @test J == Jt + end +end diff --git a/test/plans/test_vectorial_plan.jl b/test/plans/test_vectorial_plan.jl index da8daf3668..c136dd2f70 100644 --- a/test/plans/test_vectorial_plan.jl +++ b/test/plans/test_vectorial_plan.jl @@ -3,6 +3,7 @@ using Manopt: get_value, get_value_function, get_gradient_function @testset "VectorialGradientCost" begin M = ManifoldsBase.DefaultManifold(3) g(M, p) = [p[1] - 1, -p[2] - 1] + g!(M, V, p) = (V .= [p[1] - 1, -p[2] - 1]) # # Function grad_g(M, p) = [[1.0, 0.0, 0.0], [0.0, -1.0, 0.0]] hess_g(M, p, X) = [copy(X), -copy(X)] @@ -37,7 +38,7 @@ using Manopt: get_value, get_value_function, get_gradient_function function_type=ComponentVectorialType(), jacobian_type=ComponentVectorialType(), ) - vgf_fi = VectorGradientFunction(g, grad_g!, 2; evaluation=InplaceEvaluation()) + vgf_fi = VectorGradientFunction(g!, grad_g!, 2; evaluation=InplaceEvaluation()) vgf_vi = VectorGradientFunction( [g1, g2], [grad_g1!, grad_g2!], @@ -50,12 +51,24 @@ using Manopt: get_value, get_value_function, get_gradient_function g, jac_g, 2; jacobian_type=CoordinateVectorialType(DefaultOrthonormalBasis()) ) vgf_ji = VectorGradientFunction( - g, + g!, jac_g!, 2; jacobian_type=CoordinateVectorialType(DefaultOrthonormalBasis()), evaluation=InplaceEvaluation(), ) + @test Manopt.get_jacobian_basis(vgf_ji) == vgf_ji.jacobian_type.basis + @test Manopt.get_jacobian_basis(vgf_vi) == DefaultOrthonormalBasis() + vgf_jib = VectorGradientFunction( + g!, + jac_g!, + 2; + jacobian_type=CoordinateVectorialType(DefaultBasis()), + evaluation=InplaceEvaluation(), + ) + @test Manopt.get_jacobian_basis(vgf_ji) == vgf_ji.jacobian_type.basis + @test Manopt.get_jacobian_basis(vgf_jib) == DefaultBasis() + @test Manopt.get_jacobian_basis(vgf_vi) == DefaultOrthonormalBasis() p = [1.0, 2.0, 3.0] c = [0.0, -3.0] gg = [[1.0, 0.0, 0.0], [0.0, -1.0, 0.0]] @@ -71,7 +84,7 @@ using Manopt: get_value, get_value_function, get_gradient_function jacobian_type=ComponentVectorialType(), hessian_type=ComponentVectorialType(), ) - vhf_fi = VectorHessianFunction(g, grad_g!, hess_g!, 2; evaluation=InplaceEvaluation()) + vhf_fi = VectorHessianFunction(g!, grad_g!, hess_g!, 2; evaluation=InplaceEvaluation()) vhf_vi = VectorHessianFunction( [g1, g2], [grad_g1!, grad_g2!], diff --git a/test/runtests.jl b/test/runtests.jl index 3d90036bcb..80947d9fa1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -16,6 +16,7 @@ include("utils/example_tasks.jl") include("plans/test_conjugate_residual_plan.jl") include("plans/test_interior_point_newton_plan.jl") include("plans/test_nelder_mead_plan.jl") + include("plans/test_nonlinear_least_squares_plan.jl") include("plans/test_gradient_plan.jl") include("plans/test_constrained_plan.jl") include("plans/test_hessian_plan.jl")