fixed type instability in new LBFGS, re-enabled constructors

nantonel · nantonel · commit 2997ed3e3078 · 2017-11-26T17:53:34.000+01:00
diff --git a/src/AbstractOperators.jl b/src/AbstractOperators.jl
@@ -16,7 +16,7 @@ export LinearOperator,
 # Block stuff
 
 include("utilities/block.jl")
-include("utilities/deep.jl") # TODO: remove this eventually
+#include("utilities/deep.jl") # TODO: remove this eventually
 
 # Predicates and properties
 
diff --git a/src/linearoperators/LBFGS.jl b/src/linearoperators/LBFGS.jl
@@ -18,9 +18,9 @@ julia> d = L*grad; # compute new direction
 ```
 """
 
-mutable struct LBFGS{R, T <: BlockArray, M} <: LinearOperator
-	currmem::Integer
-	curridx::Integer
+mutable struct LBFGS{R, T <: BlockArray, M, I <: Integer} <: LinearOperator
+	currmem::I
+	curridx::I
 	s::T
 	y::T
 	s_M::Array{T, 1}
@@ -30,14 +30,32 @@ mutable struct LBFGS{R, T <: BlockArray, M} <: LinearOperator
 	H::R
 end
 
-function LBFGS(x::T, M::Integer) where {R, T <: BlockArray{R}}
+function LBFGS(x::T, M::I) where {T <: BlockArray, I <: Integer}
 	s_M = [blockzeros(x) for i = 1:M]
 	y_M = [blockzeros(x) for i = 1:M]
 	s = blockzeros(x)
 	y = blockzeros(x)
 	ys_M = zeros(M)
 	alphas = zeros(M)
-	LBFGS{R, T, M}(0, 0, s, y, s_M, y_M, ys_M, alphas, one(R))
+	R = real(eltype(x[1])) 
+	LBFGS{R, T, M, I}(0, 0, s, y, s_M, y_M, ys_M, alphas, one(R))
+end
+
+function LBFGS(domainType::D, dim::T, M::I) where {D <: Type , 
+						   T <: Tuple,  I <: Integer}
+	x = blockzeros(domainType, dim)
+	return LBFGS(x,M)
+end
+
+function LBFGS(domainType::D, dim::T, M::I) where {N, D <: NTuple{N,Type}, 
+						   T <: NTuple{N,Tuple},  I <: Integer}
+	x = blockzeros(domainType, dim)
+	return LBFGS(x,M)
+end
+
+function LBFGS(dim::T, M::I) where {T <: Tuple,  I <: Integer}
+	x = blockzeros(dim)
+	return LBFGS(x,M)
 end
 
 """
@@ -46,7 +64,7 @@ end
 See the documentation for `LBFGS`.
 """
 
-function update!(L::LBFGS{R, T, M}, x::T, x_prev::T, gradx::T, gradx_prev::T) where {R, T, M}
+function update!(L::LBFGS{R, T, M, I}, x::T, x_prev::T, gradx::T, gradx_prev::T) where {R, T, M, I}
 	L.s .= x .- x_prev
 	L.y .= gradx .- gradx_prev
 	ys = real(blockvecdot(L.s, L.y))
@@ -58,49 +76,49 @@ function update!(L::LBFGS{R, T, M}, x::T, x_prev::T, gradx::T, gradx_prev::T) wh
 		L.ys_M[L.curridx] = ys
 		blockcopy!(L.s_M[L.curridx], L.s)
 		blockcopy!(L.y_M[L.curridx], L.y)
-		yty = real(vecdot(L.y, L.y))
+		yty = real(blockvecdot(L.y, L.y))
 		L.H = ys/yty
 	end
 	return L
 end
 
 # LBFGS operators are symmetric
 
-Ac_mul_B!(x::T, L::LBFGS{R, T, M}, y::T) where {R, T, M} = A_mul_B!(x, L, y)
+Ac_mul_B!(x::T, L::LBFGS{R, T, M, I}, y::T) where {R, T, M, I} = A_mul_B!(x, L, y)
 
 # Two-loop recursion
 
-function A_mul_B!(d::T, L::LBFGS{R, T, M}, gradx::T) where {R, T, M}
+function A_mul_B!(d::T, L::LBFGS{R, T, M, I}, gradx::T) where {R, T, M, I}
 	d .= gradx
 	idx = loop1!(d,L)
 	d .= (*).(L.H, d)
 	d = loop2!(d,idx,L)
 end
 
-function loop1!(d::T, L::LBFGS{R, T, M}) where {R, T, M}
+function loop1!(d::T, L::LBFGS{R, T, M, I}) where {R, T, M, I}
 	idx = L.curridx
 	for i = 1:L.currmem
-		L.alphas[idx] = real(vecdot(L.s_M[idx], d))/L.ys_M[idx]
+		L.alphas[idx] = real(blockvecdot(L.s_M[idx], d))/L.ys_M[idx]
 		d .-= L.alphas[idx] .* L.y_M[idx]
 		idx -= 1
 		if idx == 0 idx = M end
 	end
 	return idx
 end
 
-function loop2!(d::T, idx::Int, L::LBFGS{R, T, M}) where {R, T, M}
+function loop2!(d::T, idx::Int, L::LBFGS{R, T, M, I}) where {R, T, M, I}
 	for i = 1:L.currmem
 		idx += 1
 		if idx > M idx = 1 end
-		beta = real(vecdot(L.y_M[idx], d))/L.ys_M[idx]
+		beta = real(blockvecdot(L.y_M[idx], d))/L.ys_M[idx]
 		d .+= (L.alphas[idx] - beta) .* L.s_M[idx]
 	end
 	return d
 end
 
 # Properties
-  domainType(L::LBFGS{R, T, M}) where {R, T, M} = T
-codomainType(L::LBFGS{R, T, M}) where {R, T, M} = T
+domainType(L::LBFGS{R, T, M}) where {R, T, M} = blockeltype(L.y_M[1])
+codomainType(L::LBFGS{R, T, M}) where {R, T, M} = blockeltype(L.y_M[1])
 
 size(A::LBFGS) = (blocksize(A.s), blocksize(A.s))
 
diff --git a/src/utilities/block.jl b/src/utilities/block.jl
@@ -1,26 +1,32 @@
-export RealOrComplex, BlockArray
-export blocksize,
-       blocklength,
-       blockvecnorm,
-       blockmaxabs,
-       blocksimilar,
-       blockcopy,
-       blockcopy!,
-       blockset!,
-       blockvecdot,
-       blockzeros,
-       blockaxpy!
+# not sure about exporting this!
+#export RealOrComplex, BlockArray
+#export blocksize,
+#       blockeltype,
+#       blocklength,
+#       blockvecnorm,
+#       blockmaxabs,
+#       blocksimilar,
+#       blockcopy,
+#       blockcopy!,
+#       blockset!,
+#       blockvecdot,
+#       blockzeros,
+#       blockaxpy!
 
 # Define block-arrays
 
 const RealOrComplex{R} = Union{R, Complex{R}}
-const BlockArray{R} = Union{AbstractArray{C, N} where {C <: RealOrComplex{R}, N}, Tuple{Vararg{AbstractArray{C, N} where {C <: RealOrComplex{R}, N}}}}
+const BlockArray{R} = Union{AbstractArray{C, N} where {C <: RealOrComplex{R}, N}, 
+			    Tuple{Vararg{AbstractArray{C, N} where {C <: RealOrComplex{R}, N}}}}
 
 # Operations on block-arrays
 
 blocksize(x::Tuple) = blocksize.(x)
 blocksize(x::AbstractArray) = size(x)
 
+blockeltype(x::Tuple) = blockeltype.(x)
+blockeltype(x::AbstractArray) = eltype(x)
+
 blocklength(x::Tuple) = sum(blocklength.(x))
 blocklength(x::AbstractArray) = length(x)
 
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -9,9 +9,9 @@ verb = true
 
 @testset "AbstractOperators" begin
 
-@testset "Tuple operations" begin
-  include("test_deep.jl")
-end
+#@testset "Tuple operations" begin
+#  include("test_deep.jl")
+#end
 
 @testset "Linear operators" begin
   include("test_linear_operators.jl")
@@ -26,12 +26,12 @@ end
   include("test_nonlinear_operators_calculus.jl")
 end
 
-@testset "Syntax shorthands" begin
-  include("test_syntax.jl")
-end
-
 @testset "L-BFGS" begin
   include("test_lbfgs.jl")
 end
 
+@testset "Syntax shorthands" begin
+  include("test_syntax.jl")
+end
+
 end
diff --git a/test/test_lbfgs.jl b/test/test_lbfgs.jl
@@ -1,4 +1,6 @@
-# @printf("\nTesting L-BFGS routines\n")
+@printf("\nTesting L-BFGS routines\n")
+
+function test_lbfgs()
 
 Q = [32.0000 13.1000 -4.9000 -3.0000  6.0000  2.2000  2.6000  3.4000 -1.9000 -7.5000;
  13.1000 18.3000 -5.3000 -9.5000  3.0000  2.1000  3.9000  3.0000 -3.6000 -4.4000;
@@ -60,14 +62,41 @@ for i = 1:5
 
     dir_ref = dirs_ref[:,i]
 
-	@time A_mul_B!(dir, H, -grad)
+    gradm = -grad
+    @time A_mul_B!(dir, H, gradm)
     @test vecnorm(dir-dir_ref, Inf)/(1+vecnorm(dir_ref, Inf)) <= 1e-15
 
-    @time A_mul_B!(dirdir, HH, (-grad, -grad))
+    gradm2 = (-grad,-grad)
+    @time A_mul_B!(dirdir, HH, gradm2)
     @test vecnorm(dirdir[1]-dir_ref, Inf)/(1+vecnorm(dir_ref, Inf)) <= 1e-15
     @test vecnorm(dirdir[2]-dir_ref, Inf)/(1+vecnorm(dir_ref, Inf)) <= 1e-15
 
     x_old = x;
     grad_old = grad;
 
 end
+
+end
+
+test_lbfgs()
+
+#test other constructors
+mem = 3
+x = (zeros(10),zeros(Complex{Float64},10))
+H = LBFGS(x, mem)
+println(H)
+
+dim = (10,)
+H = LBFGS(dim, mem)
+println(H)
+
+dim = ((10,),(10,))
+H = LBFGS(dim, mem)
+println(H)
+
+D = (Float64,Complex{Float64})
+dim = ((10,),(10,))
+H = LBFGS(D,dim, mem)
+println(H)
+
+
diff --git a/test/test_linear_operators_calculus.jl b/test/test_linear_operators_calculus.jl
@@ -650,7 +650,7 @@ y = test_op(op, x, y0, verb)
 p = randperm(ndoms(op,2))
 y2 = op[p]*x[p]
 
-@test AbstractOperators.deepvecnorm(y .- y2) <= 1e-8
+@test AbstractOperators.blockvecnorm(y .- y2) <= 1e-8
 
 # test Scale of Sum
 
diff --git a/test/utils.jl b/test/utils.jl
@@ -4,23 +4,23 @@ function test_op(A::AbstractOperator, x, y, verb::Bool = false)
   verb && (println(); show(A); println())
 
   Ax = A*x
-  Ax2 = AbstractOperators.deepsimilar(Ax)
+  Ax2 = AbstractOperators.blocksimilar(Ax)
   verb && println("forward preallocated")
   A_mul_B!(Ax2, A, x) #verify in-place linear operator works
   verb && @time A_mul_B!(Ax2, A, x)
 
-  @test AbstractOperators.deepvecnorm(Ax .- Ax2) <= 1e-8
+  @test AbstractOperators.blockvecnorm(Ax .- Ax2) <= 1e-8
 
   Acy = A'*y
-  Acy2 = AbstractOperators.deepsimilar(Acy)
+  Acy2 = AbstractOperators.blocksimilar(Acy)
   verb && println("adjoint preallocated")
   Ac_mul_B!(Acy2, A, y) #verify in-place linear operator works
   verb && @time Ac_mul_B!(Acy2, A, y)
 
-  @test AbstractOperators.deepvecnorm(Acy .- Acy2) <= 1e-8
+  @test AbstractOperators.blockvecnorm(Acy .- Acy2) <= 1e-8
 
-  s1 = real(AbstractOperators.deepvecdot(Ax2, y))
-  s2 = real(AbstractOperators.deepvecdot(x, Acy2))
+  s1 = real(AbstractOperators.blockvecdot(Ax2, y))
+  s2 = real(AbstractOperators.blockvecdot(x, Acy2))
   @test abs( s1 - s2 ) < 1e-8
 
   return Ax
@@ -32,31 +32,31 @@ function test_NLop(A::AbstractOperator, x, y, verb::Bool = false)
 	verb && (println(),println(A))
 
 	Ax = A*x
-	Ax2 = AbstractOperators.deepsimilar(Ax)
+	Ax2 = AbstractOperators.blocksimilar(Ax)
 	verb && println("forward preallocated")
 	A_mul_B!(Ax2, A, x) #verify in-place linear operator works
 	verb && @time A_mul_B!(Ax2, A, x)
 
 	@test_throws ErrorException A'
 
-	@test AbstractOperators.deepvecnorm(Ax .- Ax2) <= 1e-8
+	@test AbstractOperators.blockvecnorm(Ax .- Ax2) <= 1e-8
 
 	J = Jacobian(A,x)
 	verb && println(J)
 
 	grad = J'*y
 	A_mul_B!(Ax2, A, x) #redo forward
 	verb && println("jacobian Ac_mul_B! preallocated")
-	grad2 = AbstractOperators.deepsimilar(grad)
+	grad2 = AbstractOperators.blocksimilar(grad)
 	Ac_mul_B!(grad2, J, y) #verify in-place linear operator works
 	verb && A_mul_B!(Ax2, A, x) #redo forward
 	verb && @time Ac_mul_B!(grad2, J, y) 
 
-	@test AbstractOperators.deepvecnorm(grad .- grad2) < 1e-8
+	@test AbstractOperators.blockvecnorm(grad .- grad2) < 1e-8
 
 	grad3 = gradient_fd(A,Ax,x,y) #calculate gradient using finite differences
 
-	@test AbstractOperators.deepvecnorm(grad .- grad3) < 1e-4
+	@test AbstractOperators.blockvecnorm(grad .- grad3) < 1e-4
 
 	return Ax, grad
 end