Add Newton-type simultaneous diagonalization (#41)

blegat · web-flow · commit 8f564bbc9ecf · 2023-06-29T09:38:04.000+02:00
* Add Newton-type simultaneous diagonalization

* Fix format
diff --git a/src/SemialgebraicSets.jl b/src/SemialgebraicSets.jl
@@ -2,8 +2,7 @@ module SemialgebraicSets
 
 using Random
 
-import MutableArithmetics
-const MA = MutableArithmetics
+import MutableArithmetics as MA
 
 using MultivariatePolynomials
 const MP = MultivariatePolynomials
@@ -58,6 +57,7 @@ Base.intersect(set::AbstractSemialgebraicSet; kws...) = set
 
 include("groebner.jl")
 include("ideal.jl")
+include("multiplication_matrices.jl")
 include("solve.jl")
 include("variety.jl")
 include("basic.jl")
diff --git a/src/multiplication_matrices.jl b/src/multiplication_matrices.jl
@@ -0,0 +1,34 @@
+export ReorderedSchurMultiplicationMatricesSolver
+
+struct MultiplicationMatrices{Ms}
+    matrices::Ms
+end
+
+"""
+    AbstractMultiplicationMatricesSolver
+
+Solver of algebraic equations using multiplication matrices.
+"""
+abstract type AbstractMultiplicationMatricesSolver end
+
+function solve(
+    Ms::MultiplicationMatrices,
+    solver::AbstractMultiplicationMatricesSolver,
+)
+    λ = rand(solver.rng, length(Ms.matrices))
+    λ /= sum(λ)
+    return _solve_multiplication_matrices(Ms.matrices, λ, solver)
+end
+
+include("schur.jl")
+include("newton_type.jl")
+
+function default_multiplication_matrices_solver(
+    ::AbstractVector{PT},
+) where {T,PT<:_APL{T}}
+    return default_multiplication_matrices_solver(T)
+end
+
+function default_multiplication_matrices_solver(::Type{T}) where {T}
+    return ReorderedSchurMultiplicationMatricesSolver{T}()
+end
diff --git a/src/newton_type.jl b/src/newton_type.jl
@@ -0,0 +1,137 @@
+# This file is largely inspired from Bernard Mourrain's MultivariateSeries/diagonalization.jl
+
+export NewtonTypeDiagonalization
+
+# norm of off diagonal terms of a square matrix
+function norm_off(M)
+    if size(M[1], 1) > 1
+        return sqrt(
+            sum(
+                abs2(M[i, j]) + abs2(M[j, i]) for i in 1:size(M, 1) for
+                j in i+1:size(M, 1)
+            ),
+        )
+    else
+        return 0.0
+    end
+end
+
+"""
+    diagonalization_iter(D::Vector{<:AbstractMatrix{T}}) where {T}
+
+Given the vector `D` of `[F_i * E_i, F_i * M_1 * E_i, ..., F_i * M_p * E_i]`,
+computes the matrix `X` (resp. `Y`) corresponding to ``(I_n + X_i)``
+(resp. ``(I_n + Y_i)``) of [KMY22, Theorem 5] that solves equations
+[KMY22, (26)-(29)].
+"""
+function diagonalization_iter(D::Vector{<:AbstractMatrix{T}}) where {T}
+    n = LinearAlgebra.checksquare(D[1])
+    s = length(D)
+
+    X = fill(zero(T), n, n)
+    Y = fill(zero(T), n, n)
+
+    A = fill(zero(T), s, 2)
+    b = fill(zero(T), s)
+    for i in 1:n
+        for j in 1:n
+            if i != j
+                for k in 1:s
+                    A[k, 1] = D[k][i, i]
+                    A[k, 2] = D[k][j, j]
+                    b[k] = -D[k][i, j]
+                end
+                v = A \ b
+                X[i, j] = v[1]
+                Y[i, j] = v[2]
+            end
+        end
+    end
+    for i in 1:n
+        X[i, i] = 1
+        Y[i, i] = 1
+    end
+    return X, Y
+end
+
+"""
+    struct NewtonTypeDiagonalization{T,RNGT} <: AbstractMultiplicationMatricesSolver
+        max_iter::Int
+        ε::T
+        tol::T
+        rng::RNGT
+    end
+
+Simultaneous diagonalization of commuting matrices using the method of [KMY22, Theorem 5].
+
+[KMY22] Khouja, Rima, Mourrain, Bernard, and Yakoubsohn, Jean-Claude.
+*Newton-type methods for simultaneous matrix diagonalization.*
+Calcolo 59.4 (2022): 38.
+"""
+struct NewtonTypeDiagonalization{T,RNGT} <: AbstractMultiplicationMatricesSolver
+    max_iter::Int
+    ε::T
+    tol::T
+    rng::RNGT
+end
+# These were the values in MultivariateSeries/diagonalization.jl
+function NewtonTypeDiagonalization(max_iter, ε, tol)
+    return NewtonTypeDiagonalization(max_iter, ε, tol, Random.GLOBAL_RNG)
+end
+NewtonTypeDiagonalization() = NewtonTypeDiagonalization(10, 1e-3, 5e-2)
+
+function _eigvecs(M::AbstractMatrix{BigFloat})
+    ev = LinearAlgebra.schur(Float64.(M)).vectors
+    # `eigvecs` is failing some tests with a non-invertible `ev`
+    #ev = LinearAlgebra.eigvecs(Float64.(M))
+    return convert(Matrix{BigFloat}, ev)
+end
+_eigvecs(M::AbstractMatrix) = LinearAlgebra.eigvecs
+
+function _solve_multiplication_matrices(M, λ, solver::NewtonTypeDiagonalization)
+    @assert length(M) == length(λ)
+    n = length(λ)
+    r = LinearAlgebra.checksquare(M[1])
+
+    M1 = sum(λ .* M)
+    E = _eigvecs(M1)
+
+    # With `eigvecs`, we should do `inv` but with `schur` we can just transpose
+    #F = inv(E)
+    F = E'
+
+    D = vcat(
+        # Add one matrix for the equation `F_i * E_i = I` 
+        # constraining `E_i` to be invertible
+        [Matrix{eltype(M[1])}(I, r, r)],
+        [F * M[i] * E for i in eachindex(M)],
+    )
+    err = sum(norm_off.(D))
+    Δ = sum(norm.(D))
+
+    nit = 0
+
+    if err / Δ > solver.tol
+        Δ = err
+        while nit < solver.max_iter && Δ > solver.ε
+            err0 = err
+            X, Y = diagonalization_iter(D)
+            # From [KMY22, Theorem 5]
+            # Z_{i,k} + ∑_{i,k}
+            # = F_i * M_k * E_i
+            # = (I_n * Y_i) * (F_{i-1} * M_k * E_{i-1}) * (I_n * X_i)
+            # = (I_n * Y_i) * D[i] * (I_n * X_i)
+            # = Y * D[i] * X
+            D = [Y * D[i] * X for i in eachindex(D)]
+            # E_{i+1} = E_i * (I_n * X_i) from [KMY22, Theorem 5]
+            E = E * X
+            # F_{i+1} = (I_n * Y_i) * F_i from [KMY22, Theorem 5]
+            F = Y * F
+            nit += 1
+            err = sum(norm_off.(D))
+            Δ = err0 - err
+        end
+    end
+
+    return [[D[j+1][i, i] / D[1][i, i] for j in 1:n] for i in 1:r]
+end
diff --git a/src/schur.jl b/src/schur.jl
@@ -51,7 +51,7 @@ function _clusterordschur(M::AbstractMatrix{<:Real}, ɛ)
     atol = A[]
     # condition_number requires that conjugate pair need to be treated together so we first need to handle them
     # If they are in the same cluster then pair them, otherwise it is complex solution so we reject them
-    i = 1
+    i = firstindex(v)
     while i <= lastindex(v)
         if isreal(v[i])
             push!(clusters, [i])
@@ -107,3 +107,51 @@ function _clusterordschur(M::AbstractMatrix{<:Real}, ɛ)
     end
     return Z, clusters
 end
+
+"""
+    struct ReorderedSchurMultiplicationMatricesSolver{T,RNGT<:Random.AbstractRNG} <:
+        AbstractMultiplicationMatricesSolver
+        ɛ::T
+        rng::RNGT
+    end
+
+Simultaneous diagonalization of commuting matrices using the method of [CGT97].
+
+[CGT97] Corless, R. M.; Gianni, P. M. & Trager, B. M.
+*A reordered Schur factorization method for zero-dimensional polynomial systems with multiple roots*
+Proceedings of the 1997 international symposium on Symbolic and algebraic computation, 1997, 133-140
+"""
+struct ReorderedSchurMultiplicationMatricesSolver{T,RNGT<:Random.AbstractRNG} <:
+       AbstractMultiplicationMatricesSolver
+    ɛ::T
+    rng::RNGT
+end
+function ReorderedSchurMultiplicationMatricesSolver(ɛ)
+    return ReorderedSchurMultiplicationMatricesSolver(ɛ, Random.GLOBAL_RNG)
+end
+function ReorderedSchurMultiplicationMatricesSolver{T}() where {T}
+    return ReorderedSchurMultiplicationMatricesSolver(Base.rtoldefault(real(T)))
+end
+
+# Deterministic part
+function _solve_multiplication_matrices(
+    Ms::AbstractVector{<:AbstractMatrix{T}},
+    λ,
+    solver::ReorderedSchurMultiplicationMatricesSolver,
+) where {T<:Real}
+    @assert length(Ms) == length(λ)
+    n = length(λ)
+    Z, clusters = clusterordschur(sum(λ .* Ms), solver.ɛ)
+    r = length(clusters)
+    vals = [zeros(T, n) for k in 1:r]
+    for k in 1:r
+        nk = length(clusters[k])
+        for j in clusters[k]
+            q = Z[:, j]
+            for i in 1:n
+                vals[k][i] += dot(q, Ms[i] * q) / nk
+            end
+        end
+    end
+    return vals
+end
diff --git a/src/solve.jl b/src/solve.jl
@@ -1,4 +1,4 @@
-export algebraic_solver, ReorderedSchurMultiplicationMatricesSolver
+export algebraic_solver
 
 """
     AbstractAlgebraicSolver
@@ -34,13 +34,6 @@ Returns a nullable which is `null` if `V` is not zero-dimensional and is the lis
 """
 function multiplication_matrices end
 
-"""
-    AbstractMultiplicationMatricesSolver
-
-Solver of algebraic equations using multiplication matrices.
-"""
-abstract type AbstractMultiplicationMatricesSolver end
-
 struct SolverUsingMultiplicationMatrices{
     A<:AbstractMultiplicationMatricesAlgorithm,
     S<:AbstractMultiplicationMatricesSolver,
@@ -58,10 +51,6 @@ function solve(V, solver::SolverUsingMultiplicationMatrices)
     end
 end
 
-struct MultiplicationMatrices{Ms}
-    matrices::Ms
-end
-
 struct GröbnerBasisMultiplicationMatricesAlgorithm <:
        AbstractMultiplicationMatricesAlgorithm end
 
@@ -94,55 +83,6 @@ function multiplication_matrices(
     end
 end
 
-include("schur.jl")
-
-"""
-Corless, R. M.; Gianni, P. M. & Trager, B. M. A reordered Schur factorization method for zero-dimensional polynomial systems with multiple roots Proceedings of the 1997 international symposium on Symbolic and algebraic computation, 1997, 133-140
-"""
-struct ReorderedSchurMultiplicationMatricesSolver{T,RNGT<:Random.AbstractRNG} <:
-       AbstractMultiplicationMatricesSolver
-    ɛ::T
-    rng::RNGT
-end
-function ReorderedSchurMultiplicationMatricesSolver(ɛ)
-    return ReorderedSchurMultiplicationMatricesSolver(ɛ, Random.GLOBAL_RNG)
-end
-function ReorderedSchurMultiplicationMatricesSolver{T}() where {T}
-    return ReorderedSchurMultiplicationMatricesSolver(Base.rtoldefault(real(T)))
-end
-
-function solve(
-    Ms::MultiplicationMatrices,
-    solver::ReorderedSchurMultiplicationMatricesSolver,
-)
-    λ = rand(solver.rng, length(Ms.matrices))
-    λ /= sum(λ)
-    return _solve_multiplication_matrices(Ms.matrices, λ, solver)
-end
-
-# Deterministic part
-function _solve_multiplication_matrices(
-    Ms::AbstractVector{<:AbstractMatrix{T}},
-    λ,
-    solver::ReorderedSchurMultiplicationMatricesSolver,
-) where {T<:Real}
-    @assert length(Ms) == length(λ)
-    n = length(λ)
-    Z, clusters = clusterordschur(sum(λ .* Ms), solver.ɛ)
-    r = length(clusters)
-    vals = [zeros(T, n) for k in 1:r]
-    for k in 1:r
-        nk = length(clusters[k])
-        for j in clusters[k]
-            q = Z[:, j]
-            for i in 1:n
-                vals[k][i] += dot(q, Ms[i] * q) / nk
-            end
-        end
-    end
-    return vals
-end
-
 function algebraic_solver(
     algo::AbstractMultiplicationMatricesAlgorithm,
     solver::AbstractMultiplicationMatricesSolver,
@@ -160,14 +100,6 @@ end
 function default_multiplication_matrices_algorithm(p)
     return GröbnerBasisMultiplicationMatricesAlgorithm()
 end
-function default_multiplication_matrices_solver(::Type{T}) where {T}
-    return ReorderedSchurMultiplicationMatricesSolver{T}()
-end
-function default_multiplication_matrices_solver(
-    ::AbstractVector{PT},
-) where {T,PT<:_APL{T}}
-    return default_multiplication_matrices_solver(T)
-end
 
 function default_algebraic_solver(p)
     return algebraic_solver(
diff --git a/test/solve.jl b/test/solve.jl