Fixes for Newton-type diagonalization (#42)

blegat · web-flow · commit ee000fb53ab9 · 2023-06-29T14:17:52.000+02:00
* Fixes for Newton-type diagonalization

* Fix format

* Fix build

* Fix format
diff --git a/src/cluster.jl b/src/cluster.jl
@@ -0,0 +1,79 @@
+"""
+    cluster_eigenvalues(_atol, v)
+
+Clustering the values `v` following [CGT97].
+
+[CGT97] Corless, R. M.; Gianni, P. M. & Trager, B. M.
+*A reordered Schur factorization method for zero-dimensional polynomial systems with multiple roots*
+Proceedings of the 1997 international symposium on Symbolic and algebraic computation, 1997, 133-140
+"""
+function cluster_eigenvalues(_atol, v)
+    A = typeof(_atol(1))
+    V = real(eltype(v))
+
+    ONE = one(one(V) / one(A))
+
+    clusters = Vector{Int}[]
+    λ = V[]
+    atol = A[]
+    # condition_number requires that conjugate pair need to be treated together so we first need to handle them
+    # If they are in the same cluster then pair them, otherwise it is complex solution so we reject them
+    i = firstindex(v)
+    while i <= lastindex(v)
+        if isreal(v[i])
+            push!(clusters, [i])
+            push!(λ, real(v[i]))
+            push!(atol, _atol(i))
+            i += 1
+        else
+            @assert i < lastindex(v) && !isreal(v[i+1])
+            pairatol = _atol([i, i + 1])
+            if abs(v[i] - v[i+1]) / pairatol < ONE
+                # Pair conjugate pairs into a real eigenvalue
+                push!(clusters, [i, i + 1])
+                push!(λ, real((v[i] + v[i+1]) / 2)) # The imaginary part should be zero anyway
+                push!(atol, pairatol)
+            end
+            i += 2
+        end
+    end
+
+    # For eigenvalues not clustered yet, their eigenvalues is quite large.
+    # Therefore, if we cluster all i, j close enough at once we might cluster too much
+    # The technique used here is to cluster only the closest pair.
+    # Once they are matched, a new atol is computed and if the cluster is complete,
+    # this atol will be small which will avoid addition of new eigenvalues.
+    while true
+        σ = sortperm(λ)
+        I = 0
+        J = 0
+        best = ONE
+        for _i in eachindex(σ)
+            i = σ[_i]
+            for _j in 1:(_i-1)
+                j = σ[_j]
+                d = abs(λ[i] - λ[j]) / min(atol[i], atol[j])
+                if d < best
+                    I = i
+                    J = j
+                    best = d
+                end
+            end
+        end
+        if best < ONE
+            # merge I with J
+            nI = length(clusters[I])
+            nJ = length(clusters[J])
+            λ[I] = (λ[I] * nI + λ[J] * nJ) / (nI + nJ)
+            append!(clusters[I], clusters[J])
+            atol[I] = _atol(clusters[I])
+            deleteat!(λ, J)
+            deleteat!(clusters, J)
+            deleteat!(atol, J)
+        else
+            break
+        end
+    end
+
+    return clusters
+end
diff --git a/src/multiplication_matrices.jl b/src/multiplication_matrices.jl
@@ -20,6 +20,7 @@ function solve(
     return _solve_multiplication_matrices(Ms.matrices, λ, solver)
 end
 
+include("cluster.jl")
 include("schur.jl")
 include("newton_type.jl")
 
diff --git a/src/newton_type.jl b/src/newton_type.jl
@@ -4,15 +4,13 @@ export NewtonTypeDiagonalization
 
 # norm of off diagonal terms of a square matrix
 function norm_off(M)
-    if size(M[1], 1) > 1
+    n = LinearAlgebra.checksquare(M)
+    if n > 1
         return sqrt(
-            sum(
-                abs2(M[i, j]) + abs2(M[j, i]) for i in 1:size(M, 1) for
-                j in i+1:size(M, 1)
-            ),
+            sum(abs2(M[i, j]) + abs2(M[j, i]) for i in 1:n for j in i+1:n),
         )
     else
-        return 0.0
+        return zero(eltype(M))
     end
 end
 
@@ -81,12 +79,12 @@ end
 NewtonTypeDiagonalization() = NewtonTypeDiagonalization(10, 1e-3, 5e-2)
 
 function _eigvecs(M::AbstractMatrix{BigFloat})
-    ev = LinearAlgebra.schur(Float64.(M)).vectors
-    # `eigvecs` is failing some tests with a non-invertible `ev`
-    #ev = LinearAlgebra.eigvecs(Float64.(M))
+    ev = _eigvecs(Float64.(M))
     return convert(Matrix{BigFloat}, ev)
 end
-_eigvecs(M::AbstractMatrix) = LinearAlgebra.eigvecs
+# `eigvecs` is failing some tests with a non-invertible `ev`
+#_eigvecs(M::AbstractMatrix) = LinearAlgebra.eigvecs(M)
+_eigvecs(M::AbstractMatrix) = LinearAlgebra.schur(M).vectors
 
 function _solve_multiplication_matrices(M, λ, solver::NewtonTypeDiagonalization)
     @assert length(M) == length(λ)
@@ -134,4 +132,30 @@ function _solve_multiplication_matrices(M, λ, solver::NewtonTypeDiagonalization
     end
 
     return [[D[j+1][i, i] / D[1][i, i] for j in 1:n] for i in 1:r]
+
+    #    # I implemented this when I was analysing the result after only zero iteration so unsure if it's useful
+    #    d = LinearAlgebra.Diagonal(sqrt.(inv.(LinearAlgebra.diag(D[1]))))
+    #    Λ = [d * D[j+1] * d for j in 1:n]
+
+    #    # `Λ` can be decomposed into blocks corresponding to the same eigenvalue
+    #    # These blocks can have off-diagonal entries so we further diagonalize
+    #    # with
+    #    # FIXME `solver.tol` or `solver.ε` or ?
+    #    sub_solver = ReorderedSchurMultiplicationMatricesSolver(solver.ε, solver.rng)
+    #    sols = Vector{eltype(Λ[1])}[]
+    #    i = 1
+    #    while i <= r
+    #        j = findfirst((i+1):r) do j
+    #            all(1:n) do k
+    #                # FIXME `solver.tol` or `solver.ε` or ?
+    #                return !isapprox(Λ[k][j, j], Λ[k][i, i], rtol = solver.ε)
+    #            end
+    #        end
+    #        j = something(j, r - i + 1)
+    #        I = i:(i+j-1)
+    #        sub_matrices = MultiplicationMatrices([Λ[j][I, I] for j in 1:n])
+    #        append!(sols, solve(sub_matrices, sub_solver))
+    #        i += j
+    #    end
+    #    return sols
 end
diff --git a/src/schur.jl b/src/schur.jl
@@ -34,78 +34,12 @@ end
 function _clusterordschur(M::AbstractMatrix{<:Real}, ɛ)
     # M = Z * T * Z' and "values" gives the eigenvalues
     sf = LinearAlgebra.schur(M)
-    Z = sf.Z
-    v = sf.values
     # documentation says that the error on the eigenvalues is ɛ * norm(T) / condition_number
     nT = norm(sf.T)
-
-    _atol(I) = ɛ * nT / condition_number(sf, I)
-
-    A = typeof(_atol(1))
-    V = real(eltype(v))
-
-    ONE = one(one(V) / one(A))
-
-    clusters = Vector{Int}[]
-    λ = V[]
-    atol = A[]
-    # condition_number requires that conjugate pair need to be treated together so we first need to handle them
-    # If they are in the same cluster then pair them, otherwise it is complex solution so we reject them
-    i = firstindex(v)
-    while i <= lastindex(v)
-        if isreal(v[i])
-            push!(clusters, [i])
-            push!(λ, v[i])
-            push!(atol, _atol(i))
-            i += 1
-        else
-            @assert i < lastindex(v) && !isreal(v[i+1])
-            pairatol = _atol([i, i + 1])
-            if abs(v[i] - v[i+1]) / pairatol < ONE
-                # Pair conjugate pairs into a real eigenvalue
-                push!(clusters, [i, i + 1])
-                push!(λ, real((v[i] + v[i+1]) / 2)) # The imaginary part should be zero anyway
-                push!(atol, pairatol)
-            end
-            i += 2
-        end
-    end
-    σ = sortperm(λ)
-
-    # For eigenvalues not clustered yet, their eigenvalues is quite large.
-    # Therefore, if we cluster all i, j close enough at once we might cluster too much
-    # The technique used here is to cluster only the closest pair.
-    # Once they are matched, a new atol is computed and if the cluster is complete,
-    # this atol will be small which will avoid addition of new eigenvalues.
-    while true
-        I = 0
-        J = 0
-        best = ONE
-        for i in eachindex(clusters)
-            for j in 1:(i-1)
-                d = abs(λ[i] - λ[j]) / min(atol[i], atol[j])
-                if d < best
-                    I = i
-                    J = j
-                    best = d
-                end
-            end
-        end
-        if best < ONE
-            # merge I with J
-            nI = length(clusters[I])
-            nJ = length(clusters[J])
-            λ[I] = (λ[I] * nI + λ[J] * nJ) / (nI + nJ)
-            append!(clusters[I], clusters[J])
-            atol[I] = _atol(clusters[I])
-            deleteat!(λ, J)
-            deleteat!(clusters, J)
-            deleteat!(atol, J)
-        else
-            break
-        end
+    clusters = cluster_eigenvalues(sf.values) do I
+        return ɛ * nT / condition_number(sf, I)
     end
-    return Z, clusters
+    return sf.Z, clusters
 end
 
 """
@@ -143,7 +77,7 @@ function _solve_multiplication_matrices(
     n = length(λ)
     Z, clusters = clusterordschur(sum(λ .* Ms), solver.ɛ)
     r = length(clusters)
-    vals = [zeros(T, n) for k in 1:r]
+    vals = [zeros(T, n) for _ in 1:r]
     for k in 1:r
         nk = length(clusters[k])
         for j in clusters[k]
diff --git a/test/solve.jl b/test/solve.jl
@@ -26,8 +26,8 @@ end
         -1 0 1 -2 -1 0
         -1 0 1 -2 -2 -1
     ]
-    @test sort.(SemialgebraicSets.clusterordschur(A, sqrt(eps(Float64)))[2]) ==
-          [[2], [1, 5, 6]]
+    clusters = SemialgebraicSets.clusterordschur(A, sqrt(eps(Float64)))[2]
+    @test sort(sort.(clusters); by = first) == [[1, 5, 6], [2]]
 end
 
 function testelements(X, Y; atol = Base.rtoldefault(Float64), kwargs...)
@@ -61,39 +61,43 @@ newton_solver = NewtonTypeDiagonalization()
 
 function zero_dimensional_ideal(solver)
     Mod.@polyvar x y z
+
     V = @set x == y
     @test !is_zero_dimensional(V)
     @test_throws ErrorException iterate(V)
     @test_throws ErrorException length(V)
+
     V = @set 4x^2 == -5x && 3x^3 == 0 solver
     @test V.solver.solver === solver
     @test is_zero_dimensional(V)
     testelementstypes(V, [[0]])
+
     V = @set y == x^2 && z == x^3 solver
     @test !is_zero_dimensional(V)
-    if solver isa ReorderedSchurMultiplicationMatricesSolver
-        # FIXME NewtonType should cluster, it finds `(0, 0)` 3 times
+
+    if !(solver isa NewtonTypeDiagonalization)
         V = @set x^3 == 2x * y && x^2 * y == 2y^2 + x solver
         @test is_zero_dimensional(V)
         testelementstypes(V, [[0, 0]])
     end
+
     V = @set x == 1 solver
     @test is_zero_dimensional(V)
     testelementstypes(V, [[1]])
+
     V = @set x == 1 && y == 2 solver
     @test is_zero_dimensional(V)
     testelementstypes(V, [[1, 2]])
+
     V = @set x == 4 && y^2 == x solver
     @test is_zero_dimensional(V)
     testelementstypes(V, [[4, 2], [4, -2]])
+
     V = @set x^2 + x == 6 && y == x + 1 solver
     @test is_zero_dimensional(V)
     testelements(V, [[2, 3], [-3, -2]])
-    if solver isa ReorderedSchurMultiplicationMatricesSolver
-        # FIXME NewtonType finds:`
-        # [[2, √2], [-3, 0], [-3, 0], [2, -√2]]
-        # The `[-3, 0]` should be removed, it corresponds
-        # `[-3, √3 im]`, `[-3, -√3 im]` which is a complex pair
+
+    if !(solver isa NewtonTypeDiagonalization)
         V = @set x^2 + x == 6 && y^2 == x solver
         @test is_zero_dimensional(V)
         testelements(V, [[2, √2], [2, -√2]])
@@ -107,30 +111,37 @@ end
 
 function projective_zero_dimensional_ideal(solver)
     Mod.@polyvar x y z
+
     V = projective_algebraic_set([x - y], solver)
     @test is_zero_dimensional(V)
     testelementstypes(V, [[1, 1]])
+
     V = @set x + y == z solver
     V.projective = true
     @test !is_zero_dimensional(V)
+
     V = @set y == 2x solver
     V.projective = true
     @test is_zero_dimensional(V)
     testelementstypes(V, [[1, 2]])
+
     V = @set x + y == y solver
     V.projective = true
     @test is_zero_dimensional(V)
     testelementstypes(V, [[0, 1]])
+
     V = projective_algebraic_set([x + y - x])
     @test is_zero_dimensional(V)
-    return testelementstypes(V, [[1, 0]])
+    testelementstypes(V, [[1, 0]])
+    return
 end
 
 @testset "Projective zero-dimensional ideal" begin
     projective_zero_dimensional_ideal(schur_solver)
+    projective_zero_dimensional_ideal(newton_solver)
 end
 
-@testset "Example 5.1 of CGT97" begin
+function cgt96_e51(solver)
     ɛ = 1e-4
     Iɛ = [
         1-ɛ 0
@@ -150,24 +161,33 @@ end
         Z Iɛ
     ]
     α = 0.219
-    testelements(
+    return testelements(
         SemialgebraicSets._solve_multiplication_matrices(
             [A, B],
             [α, 1 - α],
-            ReorderedSchurMultiplicationMatricesSolver{Float64}(),
+            solver,
         ),
         [[1.0, -1.0], [1.0, 1.0], [-1.0, 1.0]];
         rtol = 1e-7,
     )
 end
 
-@testset "Example 4.3 of MD95" begin
+@testset "Example 5.1 of CGT97" begin
+    @testset "Schur" begin
+        cgt96_e51(schur_solver)
+    end
+    #    @testset "Newton" begin
+    #        cgt96_e51(newton_solver)
+    #    end
+end
+
+function md95_e43(solver)
     Mod.@polyvar x y
-    V = @set x^2 + 4y^4 == 4y^2 && 3x^4 + y^2 == 5x^3 schur_solver
+    V = @set x^2 + 4y^4 == 4y^2 && 3x^4 + y^2 == 5x^3 solver
     # This test is tricky because in the schur decomposition, the 4 last eigenvalues are e.g. 3.4e-7, -1.7e-7+3e-7im, -1.7e-7-3e-7im, -6e-16
     # the second and third do not seem that close but when the three first are averaged it is very close to zero.
     @test is_zero_dimensional(V)
-    testelementstypes(
+    return testelementstypes(
         V,
         [
             [0.66209555, 0.935259169],
@@ -179,6 +199,10 @@ end
     )
 end
 
+@testset "Example 4.3 of MD95" begin
+    md95_e43(schur_solver)
+end
+
 @testset "Example 5.2 of CGT97" begin
     Mod.@polyvar x y z
     V =