Implement remaining factorization rrules

lkdvos · lkdvos · commit 648a34a01db3 · 2025-06-16T20:11:46.000-04:00
diff --git a/ext/TensorKitChainRulesCoreExt/TensorKitChainRulesCoreExt.jl b/ext/TensorKitChainRulesCoreExt/TensorKitChainRulesCoreExt.jl
@@ -14,7 +14,8 @@ using VectorInterface: promote_scale, promote_add
 
 using MatrixAlgebraKit
 using MatrixAlgebraKit: TruncationStrategy,
-                        svd_compact_pullback!, eig_full_pullback!, eigh_full_pullback!
+                        svd_compact_pullback!, eig_full_pullback!, eigh_full_pullback!,
+                        qr_compact_pullback!, lq_compact_pullback!
 
 include("utility.jl")
 include("constructors.jl")
diff --git a/ext/TensorKitChainRulesCoreExt/factorizations.jl b/ext/TensorKitChainRulesCoreExt/factorizations.jl
@@ -99,167 +99,38 @@ end
 function ChainRulesCore.rrule(::typeof(leftorth!), t::AbstractTensorMap; alg=QRpos())
     alg isa TensorKit.QR || alg isa TensorKit.QRpos ||
         error("only `alg=QR()` and `alg=QRpos()` are supported")
-    Q, R = leftorth(t; alg)
-    function leftorth!_pullback((_ΔQ, _ΔR))
-        ΔQ, ΔR = unthunk(_ΔQ), unthunk(_ΔR)
-        Δt = similar(t)
-        for (c, b) in blocks(Δt)
-            qr_pullback!(b, block(Q, c), block(R, c), block(ΔQ, c), block(ΔR, c))
+    QR = leftorth(t; alg)
+    function leftorth!_pullback(ΔQR′)
+        ΔQR = unthunk.(ΔQR′)
+        Δt = zerovector(t)
+        foreachblock(Δt) do c, (b,)
+            QRc = block.(QR, Ref(c))
+            ΔQRc = block.(ΔQR, Ref(c))
+            qr_compact_pullback!(b, QRc, ΔQRc)
+            return nothing
         end
         return NoTangent(), Δt
     end
-    leftorth!_pullback(::Tuple{ZeroTangent,ZeroTangent}) = NoTangent(), ZeroTangent()
-    return (Q, R), leftorth!_pullback
+    leftorth!_pullback(::NTuple{2,ZeroTangent}) = NoTangent(), ZeroTangent()
+
+    return QR, leftorth!_pullback
 end
 
 function ChainRulesCore.rrule(::typeof(rightorth!), t::AbstractTensorMap; alg=LQpos())
     alg isa TensorKit.LQ || alg isa TensorKit.LQpos ||
         error("only `alg=LQ()` and `alg=LQpos()` are supported")
-    L, Q = rightorth(t; alg)
-    function rightorth!_pullback((_ΔL, _ΔQ))
-        ΔL, ΔQ = unthunk(_ΔL), unthunk(_ΔQ)
-        Δt = similar(t)
-        for (c, b) in blocks(Δt)
-            lq_pullback!(b, block(L, c), block(Q, c), block(ΔL, c), block(ΔQ, c))
+    LQ = rightorth(t; alg)
+    function rightorth!_pullback(ΔLQ′)
+        ΔLQ = unthunk(ΔLQ′)
+        Δt = zerovector(t)
+        foreachblock(Δt) do c, (b,)
+            LQc = block.(LQ, Ref(c))
+            ΔLQc = block.(ΔLQ, Ref(c))
+            lq_compact_pullback!(b, LQc, ΔLQc)
+            return nothing
         end
         return NoTangent(), Δt
     end
-    rightorth!_pullback(::Tuple{ZeroTangent,ZeroTangent}) = NoTangent(), ZeroTangent()
-    return (L, Q), rightorth!_pullback
-end
-
-# Corresponding matrix factorisations: implemented as mutating methods
-# ---------------------------------------------------------------------
-# helper routines
-safe_inv(a, tol) = abs(a) < tol ? zero(a) : inv(a)
-
-function lowertriangularind(A::AbstractMatrix)
-    m, n = size(A)
-    I = Vector{Int}(undef, div(m * (m - 1), 2) + m * (n - m))
-    offset = 0
-    for j in 1:n
-        r = (j + 1):m
-        I[offset .- j .+ r] = (j - 1) * m .+ r
-        offset += length(r)
-    end
-    return I
-end
-
-function uppertriangularind(A::AbstractMatrix)
-    m, n = size(A)
-    I = Vector{Int}(undef, div(m * (m - 1), 2) + m * (n - m))
-    offset = 0
-    for i in 1:m
-        r = (i + 1):n
-        I[offset .- i .+ r] = i .+ m .* (r .- 1)
-        offset += length(r)
-    end
-    return I
-end
-
-function qr_pullback!(ΔA::AbstractMatrix, Q::AbstractMatrix, R::AbstractMatrix, ΔQ, ΔR;
-                      tol::Real=default_pullback_gaugetol(R))
-    Rd = view(R, diagind(R))
-    p = something(findlast(≥(tol) ∘ abs, Rd), 0)
-    m, n = size(R)
-
-    Q1 = view(Q, :, 1:p)
-    R1 = view(R, 1:p, :)
-    R11 = view(R, 1:p, 1:p)
-
-    ΔA1 = view(ΔA, :, 1:p)
-    ΔQ1 = view(ΔQ, :, 1:p)
-    ΔR1 = view(ΔR, 1:p, :)
-
-    M = similar(R, (p, p))
-    ΔR isa AbstractZero || mul!(M, ΔR1, R1')
-    ΔQ isa AbstractZero || mul!(M, Q1', ΔQ1, -1, !(ΔR isa AbstractZero))
-    view(M, lowertriangularind(M)) .= conj.(view(M, uppertriangularind(M)))
-    if eltype(M) <: Complex
-        Md = view(M, diagind(M))
-        Md .= real.(Md)
-    end
-
-    ΔA1 .= ΔQ1
-    mul!(ΔA1, Q1, M, +1, 1)
-
-    if n > p
-        R12 = view(R, 1:p, (p + 1):n)
-        ΔA2 = view(ΔA, :, (p + 1):n)
-        ΔR12 = view(ΔR, 1:p, (p + 1):n)
-
-        if ΔR isa AbstractZero
-            ΔA2 .= zero(eltype(ΔA))
-        else
-            mul!(ΔA2, Q1, ΔR12)
-            mul!(ΔA1, ΔA2, R12', -1, 1)
-        end
-    end
-    if m > p && !(ΔQ isa AbstractZero) # case where R is not full rank
-        Q2 = view(Q, :, (p + 1):m)
-        ΔQ2 = view(ΔQ, :, (p + 1):m)
-        Q1dΔQ2 = Q1' * ΔQ2
-        Δgauge = norm(mul!(copy(ΔQ2), Q1, Q1dΔQ2, -1, 1), Inf)
-        Δgauge < tol ||
-            @warn "`qr` cotangents sensitive to gauge choice: (|Δgauge| = $Δgauge)"
-        mul!(ΔA1, Q2, Q1dΔQ2', -1, 1)
-    end
-    rdiv!(ΔA1, UpperTriangular(R11)')
-    return ΔA
-end
-
-function lq_pullback!(ΔA::AbstractMatrix, L::AbstractMatrix, Q::AbstractMatrix, ΔL, ΔQ;
-                      tol::Real=default_pullback_gaugetol(L))
-    Ld = view(L, diagind(L))
-    p = something(findlast(≥(tol) ∘ abs, Ld), 0)
-    m, n = size(L)
-
-    L1 = view(L, :, 1:p)
-    L11 = view(L, 1:p, 1:p)
-    Q1 = view(Q, 1:p, :)
-
-    ΔA1 = view(ΔA, 1:p, :)
-    ΔQ1 = view(ΔQ, 1:p, :)
-    ΔL1 = view(ΔL, :, 1:p)
-
-    M = similar(L, (p, p))
-    ΔL isa AbstractZero || mul!(M, L1', ΔL1)
-    ΔQ isa AbstractZero || mul!(M, ΔQ1, Q1', -1, !(ΔL isa AbstractZero))
-    view(M, uppertriangularind(M)) .= conj.(view(M, lowertriangularind(M)))
-    if eltype(M) <: Complex
-        Md = view(M, diagind(M))
-        Md .= real.(Md)
-    end
-
-    ΔA1 .= ΔQ1
-    mul!(ΔA1, M, Q1, +1, 1)
-
-    if m > p
-        L21 = view(L, (p + 1):m, 1:p)
-        ΔA2 = view(ΔA, (p + 1):m, :)
-        ΔL21 = view(ΔL, (p + 1):m, 1:p)
-
-        if ΔL isa AbstractZero
-            ΔA2 .= zero(eltype(ΔA))
-        else
-            mul!(ΔA2, ΔL21, Q1)
-            mul!(ΔA1, L21', ΔA2, -1, 1)
-        end
-    end
-    if n > p && !(ΔQ isa AbstractZero) # case where R is not full rank
-        Q2 = view(Q, (p + 1):n, :)
-        ΔQ2 = view(ΔQ, (p + 1):n, :)
-        ΔQ2Q1d = ΔQ2 * Q1'
-        Δgauge = norm(mul!(copy(ΔQ2), ΔQ2Q1d, Q1, -1, 1))
-        Δgauge < tol ||
-            @warn "`lq` cotangents sensitive to gauge choice: (|Δgauge| = $Δgauge)"
-        mul!(ΔA1, ΔQ2Q1d', Q2, -1, 1)
-    end
-    ldiv!(LowerTriangular(L11)', ΔA1)
-    return ΔA
-end
-
-function default_pullback_gaugetol(a)
-    n = norm(a, Inf)
-    return eps(eltype(n))^(3 / 4) * max(n, one(n))
+    rightorth!_pullback(::NTuple{2,ZeroTangent}) = NoTangent(), ZeroTangent()
+    return LQ, rightorth!_pullback
 end