Add constraints with ForwardDiff support and support IPNewton (#47)

abhigupta768 · web-flow · commit 2841bbe9ec49 · 2020-09-08T18:24:43.000+05:30
* Add constraints with ForwardDiff support and support IPNewton

* Fix CI failure and revert flux logging changes

* Use empty arrays as default bounds

* Add constaints kwargs to all AD backends to pass to constructor

* Add multiple constraints with IPNewton

* Add num_cons to all AD backend OptimizationFunction constructor
diff --git a/src/function.jl b/src/function.jl
@@ -8,16 +8,21 @@ struct AutoZygote <: AbstractADType end
 struct AutoFiniteDiff <: AbstractADType end
 struct AutoModelingToolkit <: AbstractADType end
 
-struct OptimizationFunction{F,G,H,HV,K} <: AbstractOptimizationFunction
+struct OptimizationFunction{F,G,H,HV,C,CJ,CH,K} <: AbstractOptimizationFunction
     f::F
     grad::G
     hess::H
     hv::HV
     adtype::AbstractADType
+    cons::C
+    cons_j::CJ
+    cons_h::CH
+    num_cons::Int
     kwargs::K
 end
 
-function OptimizationFunction(f, x, ::AutoForwardDiff; grad=nothing,hess=nothing, p=DiffEqBase.NullParameters(), chunksize = 1, hv = nothing, kwargs...)
+function OptimizationFunction(f, x, ::AutoForwardDiff; grad=nothing, hess=nothing, cons = nothing, cons_j = nothing, cons_h = nothing, 
+                                num_cons = 0, p=DiffEqBase.NullParameters(), chunksize = 1, hv = nothing, kwargs...)
     _f = θ -> f(θ,p)[1]
     if grad === nothing
         gradcfg = ForwardDiff.GradientConfig(_f, x, ForwardDiff.Chunk{chunksize}())
@@ -37,10 +42,41 @@ function OptimizationFunction(f, x, ::AutoForwardDiff; grad=nothing,hess=nothing
         end
     end
 
-    return OptimizationFunction{typeof(f),typeof(grad),typeof(hess),typeof(hv),typeof(kwargs)}(f,grad,hess,hv,AutoForwardDiff(),kwargs)
+    if cons !== nothing && cons_j === nothing
+        if num_cons == 1
+            cjconfig = ForwardDiff.JacobianConfig(cons, x, ForwardDiff.Chunk{chunksize}())
+            cons_j = (res,θ) -> ForwardDiff.jacobian!(res, cons, θ, cjconfig)
+        else
+            cons_j = function (res, θ)
+                for i in 1:num_cons
+                    cjconfig = ForwardDiff.JacobianConfig(x -> cons(x)[i], θ, ForwardDiff.Chunk{chunksize}())
+                    ForwardDiff.jacobian!(res[i], x -> cons(x)[i], θ, cjconfig, Val{false}())
+                end
+            end
+        end
+    end
+
+    if cons !== nothing && cons_h === nothing
+        if num_cons == 1
+            cons_h = function (res, θ)
+                hess_config_cache = ForwardDiff.HessianConfig(cons, θ, ForwardDiff.Chunk{chunksize}())
+                ForwardDiff.hessian!(res, cons, θ, hess_config_cache)
+            end
+        else
+            cons_h = function (res, θ)
+                for i in 1:num_cons
+                    hess_config_cache = ForwardDiff.HessianConfig(x -> cons(x)[i], θ, ForwardDiff.Chunk{chunksize}())
+                    ForwardDiff.hessian!(res[i], x -> cons(x)[i], θ, hess_config_cache, Val{false}())
+                end
+            end 
+        end
+    end
+
+    return OptimizationFunction{typeof(f),typeof(grad),typeof(hess),typeof(hv),typeof(cons),typeof(cons_j),typeof(cons_h),typeof(kwargs)}(f,grad,hess,hv,AutoForwardDiff(),cons,cons_j,cons_h,num_cons,kwargs)
 end
 
-function OptimizationFunction(f, x, ::AutoZygote; grad=nothing, hess=nothing, p=DiffEqBase.NullParameters(), hv = nothing, kwargs...)
+function OptimizationFunction(f, x, ::AutoZygote; grad=nothing, hess=nothing, cons = nothing, cons_j = nothing, cons_h = nothing, 
+                                num_cons = 0, p=DiffEqBase.NullParameters(), hv = nothing, kwargs...)
     _f = θ -> f(θ,p)[1]
     if grad === nothing
         grad = (res,θ) -> res isa DiffResults.DiffResult ? DiffResults.gradient!(res, Zygote.gradient(_f, θ)[1]) : res .= Zygote.gradient(_f, θ)[1]
@@ -68,10 +104,11 @@ function OptimizationFunction(f, x, ::AutoZygote; grad=nothing, hess=nothing, p=
             H .= getindex.(ForwardDiff.partials.(DiffResults.gradient(res)),1)
         end
     end
-    return OptimizationFunction{typeof(f),typeof(grad),typeof(hess),typeof(hv),typeof(kwargs)}(f,grad,hess,hv,AutoZygote(),kwargs)
+    return OptimizationFunction{typeof(f),typeof(grad),typeof(hess),typeof(hv),typeof(cons),typeof(cons_j),typeof(cons_h),typeof(kwargs)}(f,grad,hess,hv,AutoZygote(),cons,cons_j,cons_h,num_cons,kwargs)
 end
 
-function OptimizationFunction(f, x, ::AutoReverseDiff; grad=nothing,hess=nothing, p=DiffEqBase.NullParameters(), hv = nothing, kwargs...)
+function OptimizationFunction(f, x, ::AutoReverseDiff; grad=nothing,hess=nothing, cons = nothing, cons_j = nothing, cons_h = nothing, 
+                                num_cons = 0, p=DiffEqBase.NullParameters(), hv = nothing, kwargs...)
     _f = θ -> f(θ,p)[1]
     if grad === nothing
         grad = (res,θ) -> ReverseDiff.gradient!(res, _f, θ, ReverseDiff.GradientConfig(θ))
@@ -100,11 +137,12 @@ function OptimizationFunction(f, x, ::AutoReverseDiff; grad=nothing,hess=nothing
         end
     end
 
-    return OptimizationFunction{typeof(f),typeof(grad),typeof(hess),typeof(hv),typeof(kwargs)}(f,grad,hess,hv,AutoReverseDiff(),kwargs)
+    return OptimizationFunction{typeof(f),typeof(grad),typeof(hess),typeof(hv),typeof(cons),typeof(cons_j),typeof(cons_h),typeof(kwargs)}(f,grad,hess,hv,AutoReverseDiff(),cons,cons_j,cons_h,num_cons,kwargs)
 end
 
 
-function OptimizationFunction(f, x, ::AutoTracker; grad=nothing,hess=nothing, p=DiffEqBase.NullParameters(), hv = nothing, kwargs...)
+function OptimizationFunction(f, x, ::AutoTracker; grad=nothing,hess=nothing, cons = nothing, cons_j = nothing, cons_h = nothing, 
+                                num_cons = 0, p=DiffEqBase.NullParameters(), hv = nothing, kwargs...)
     _f = θ -> f(θ,p)[1]
     if grad === nothing
         grad = (res,θ) -> res isa DiffResults.DiffResult ? DiffResults.gradient!(res, Tracker.data(Tracker.gradient(_f, θ)[1])) : res .= Tracker.data(Tracker.gradient(_f, θ)[1])
@@ -119,10 +157,11 @@ function OptimizationFunction(f, x, ::AutoTracker; grad=nothing,hess=nothing, p=
     end
 
 
-    return OptimizationFunction{typeof(f),typeof(grad),typeof(hess),typeof(hv),typeof(kwargs)}(f,grad,hess,hv,AutoTracker(),kwargs)
+    return OptimizationFunction{typeof(f),typeof(grad),typeof(hess),typeof(hv),typeof(cons),typeof(cons_j),typeof(cons_h),typeof(kwargs)}(f,grad,hess,hv,AutoTracker(),cons,cons_j,cons_h,num_cons,kwargs)
 end
 
-function OptimizationFunction(f, x, adtype::AutoFiniteDiff; grad=nothing,hess=nothing, p=DiffEqBase.NullParameters(), hv = nothing, fdtype = :forward, fdhtype = :hcentral, kwargs...)
+function OptimizationFunction(f, x, adtype::AutoFiniteDiff; grad=nothing,hess=nothing, cons = nothing, cons_j = nothing, cons_h = nothing, 
+                                num_cons = 0, p=DiffEqBase.NullParameters(), hv = nothing, fdtype = :forward, fdhtype = :hcentral, kwargs...)
     _f = θ -> f(θ,p)[1]
     if grad === nothing
         grad = (res,θ) -> FiniteDiff.finite_difference_gradient!(res, _f, θ, FiniteDiff.GradientCache(res, x, Val{fdtype}))
@@ -140,5 +179,5 @@ function OptimizationFunction(f, x, adtype::AutoFiniteDiff; grad=nothing,hess=no
         end
     end
 
-    return OptimizationFunction{typeof(f),typeof(grad),typeof(hess),typeof(hv),typeof(kwargs)}(f,grad,hess,hv,adtype,kwargs)
+    return OptimizationFunction{typeof(f),typeof(grad),typeof(hess),typeof(hv),typeof(cons),typeof(cons_j),typeof(cons_h),typeof(kwargs)}(f,grad,hess,hv,adtype,cons,cons_j,cons_h,num_cons,kwargs)
 end
diff --git a/src/problem.jl b/src/problem.jl
@@ -1,13 +1,15 @@
 abstract type AbstractOptimizationProblem end
 
-struct OptimizationProblem{F,X,P,B,K} <: AbstractOptimizationProblem
+struct OptimizationProblem{F,X,P,B,LC,UC,K} <: AbstractOptimizationProblem
     f::F
     x::X
     p::P
     lb::B
     ub::B
+    lcons::LC
+    ucons::UC
     kwargs::K
-    function OptimizationProblem(f, x; p=DiffEqBase.NullParameters(), lb = nothing, ub = nothing, kwargs...)
-        new{typeof(f), typeof(x), typeof(p), typeof(lb), typeof(kwargs)}(f, x, p, lb, ub, kwargs)
+    function OptimizationProblem(f, x; p=DiffEqBase.NullParameters(), lb = [], ub = [], lcons = [], ucons = [], kwargs...)
+        new{typeof(f), typeof(x), typeof(p), typeof(lb), typeof(lcons), typeof(ucons), typeof(kwargs)}(f, x, p, lb, ub, lcons, ucons, kwargs)
     end
 end
diff --git a/src/solve.jl b/src/solve.jl
@@ -26,38 +26,35 @@ function update!(opt, xs::Flux.Zygote.Params, gs)
   end
 end
 
-maybe_with_logger(f, logger) = logger === nothing ? f() : Logging.with_logger(f, logger)
-
-  function default_logger(logger)
-   Logging.min_enabled_level(logger) ≤ ProgressLogging.ProgressLevel && return nothing
-
-    if Sys.iswindows() || (isdefined(Main, :IJulia) && Main.IJulia.inited)
-     progresslogger = ConsoleProgressMonitor.ProgressLogger()
-   else
-     progresslogger = TerminalLoggers.TerminalLogger()
-   end
-
-    logger1 = LoggingExtras.EarlyFilteredLogger(progresslogger) do log
-     log.level == ProgressLogging.ProgressLevel
-   end
-   logger2 = LoggingExtras.EarlyFilteredLogger(logger) do log
-     log.level != ProgressLogging.ProgressLevel
-   end
-
-    LoggingExtras.TeeLogger(logger1, logger2)
+maybe_with_logger(f, logger) = logger === nothing ? f() : Logging.with_logger(f, logger)		
+
+function default_logger(logger)		
+	Logging.min_enabled_level(logger) ≤ ProgressLogging.ProgressLevel && return nothing		
+	if Sys.iswindows() || (isdefined(Main, :IJulia) && Main.IJulia.inited)		
+  		progresslogger = ConsoleProgressMonitor.ProgressLogger()		
+	else		
+  		progresslogger = TerminalLoggers.TerminalLogger()		
+	end		
+	logger1 = LoggingExtras.EarlyFilteredLogger(progresslogger) do log		
+		log.level == ProgressLogging.ProgressLevel		
+	end		
+	logger2 = LoggingExtras.EarlyFilteredLogger(logger) do log		
+		log.level != ProgressLogging.ProgressLevel		
+	end		
+	LoggingExtras.TeeLogger(logger1, logger2)		
 end
 
 macro withprogress(progress, exprs...)
-  quote
-    if $progress
-		$maybe_with_logger($default_logger($Logging.current_logger())) do
-        	$ProgressLogging.@withprogress $(exprs...)
-      	end
-    else
-      $(exprs[end])
-    end
-  end |> esc
-end
+	quote
+	  if $progress
+		  $maybe_with_logger($default_logger($Logging.current_logger())) do
+			  $ProgressLogging.@withprogress $(exprs...)
+			end
+	  else
+		$(exprs[end])
+	  end
+	end |> esc
+  end
 
 function __solve(prob::OptimizationProblem, opt;cb = (args...) -> (false), maxiters = 1000, progress = true, save_best = true, kwargs...)
 
@@ -224,6 +221,66 @@ function __solve(prob::OptimizationProblem, opt::Union{Optim.Fminbox,Optim.SAMIN
 	Optim.optimize(optim_f, prob.lb, prob.ub, prob.x, opt, Optim.Options(;extended_trace = true, callback = _cb, iterations = maxiters, kwargs...))
 end
 
+
+function __solve(prob::OptimizationProblem, opt::Optim.ConstrainedOptimizer;cb = (args...) -> (false), maxiters = 1000, kwargs...)
+	local x
+
+  	function _cb(trace)
+	  cb_call = cb(decompose_trace(trace).metadata["x"],x...)
+	  if !(typeof(cb_call) <: Bool)
+		  error("The callback should return a boolean `halt` for whether to stop the optimization process.")
+	  end
+	  cb_call
+	end
+  
+  	if prob.f isa OptimizationFunction
+		_loss = function(θ)
+			x = prob.f.f(θ, prob.p)
+		  	return x[1]
+		end
+		fg! = function (G,θ)
+			if G !== nothing
+				prob.f.grad(G, θ)
+		  	end
+			return _loss(θ)
+		end
+		optim_f = TwiceDifferentiable(_loss, prob.f.grad, fg!, prob.f.hess, prob.x)
+
+		cons! = (res, θ) -> res .= prob.f.cons(θ);
+
+		cons_j! = function(J, x)
+			if prob.f.num_cons > 1
+				res = [zeros(1,size(J,2)) for i in 1:size(J,1)]
+				prob.f.cons_j(res, x)
+				J = vcat(res...) 
+			else
+				prob.f.cons_j(J, x)
+			end
+		end
+
+		cons_hl! = function (h, θ, λ)
+			if prob.f.num_cons > 1
+				res = [similar(h) for i in 1:length(λ)]
+				prob.f.cons_h(res, θ)
+				h .= zeros(size(h))
+				for i in 1:length(λ)
+					h += λ[i]*res[i] 
+				end
+			else
+				prob.f.cons_h(h, θ)
+				h += λ[1]*h
+			end
+			
+		end
+		optim_fc = TwiceDifferentiableConstraints(cons!, cons_j!, cons_hl!, prob.lb, prob.ub, prob.lcons, prob.ucons)
+  	else
+	  	error("Use OptimizationFunction to pass the derivatives or automatically generate them with one of the autodiff backends")
+  	end
+
+	Optim.optimize(optim_f, optim_fc, prob.x, opt, Optim.Options(;extended_trace = true, callback = _cb, iterations = maxiters, kwargs...))
+end
+
+
 function __init__()
 	@require BlackBoxOptim="a134a8b2-14d6-55f6-9291-3336d3ab0209" begin
 		decompose_trace(opt::BlackBoxOptim.OptRunController) = BlackBoxOptim.best_candidate(opt)
@@ -318,10 +375,10 @@ function __init__()
 				NLopt.min_objective!(opt, _loss)
 			end
 
-			if prob.ub !== nothing
-				NLopt.upper_bounds!(opt, prob.ub)
+			if length(prob.ub) > 0
+				NLopt.upper_bounds!(opt, prob.ub)				
 			end
-			if prob.lb !== nothing
+			if length(prob.lb) > 0
 				NLopt.lower_bounds!(opt, prob.lb)
 			end
 
diff --git a/test/rosenbrock.jl b/test/rosenbrock.jl
@@ -24,8 +24,7 @@ prob = OptimizationProblem(rosenbrock, x0)
 sol = solve(prob, NelderMead())
 @test 10*sol.minimum < l1
 
-
-optprob = OptimizationFunction(rosenbrock, x0, GalacticOptim.AutoZygote())
+optprob = OptimizationFunction(rosenbrock, x0, GalacticOptim.AutoForwardDiff();cons= x -> x[1]^2 + x[2]^2, num_cons = 1)
 
 prob = OptimizationProblem(optprob, x0)
 sol = solve(prob, BFGS())
@@ -37,6 +36,29 @@ sol = solve(prob, Newton())
 sol = solve(prob, Optim.KrylovTrustRegion())
 @test 10*sol.minimum < l1
 
+prob = OptimizationProblem(optprob, x0, lcons = [-Inf], ucons = [Inf])
+sol = solve(prob, IPNewton())
+@test 10*sol.minimum < l1
+
+prob = OptimizationProblem(optprob, x0, lcons = [-5.0], ucons = [10.0])
+sol = solve(prob, IPNewton())
+@test 10*sol.minimum < l1
+
+prob = OptimizationProblem(optprob, x0, lcons = [0.0], ucons = [0.0], lb = [-500.0,-500.0], ub=[-50.0,-50.0])
+sol = solve(prob, IPNewton())
+@test sol.minimum < l1
+
+function con2_c(x)
+    [x[1]^2 + x[2]^2, x[2]*sin(x[1])-x[1]]
+end
+
+optprob = OptimizationFunction(rosenbrock, x0, GalacticOptim.AutoForwardDiff();cons= con2_c, num_cons = 2)
+prob = OptimizationProblem(optprob, x0, lcons = [-Inf,-Inf], ucons = [Inf,Inf])
+sol = solve(prob, IPNewton())
+@test 10*sol.minimum < l1
+
+optprob = OptimizationFunction(rosenbrock, x0, GalacticOptim.AutoZygote())
+prob = OptimizationProblem(optprob, x0)
 sol = solve(prob, ADAM(), progress = false)
 @test 10*sol.minimum < l1