Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ext/TensorKitChainRulesCoreExt/linalg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ function ChainRulesCore.rrule(::typeof(⊗), A::AbstractTensorMap, B::AbstractTe
ipA = (codomainind(A), domainind(A))
pB = (allind(B), ())
dA = zerovector(A, promote_contract(scalartype(ΔC), scalartype(B)))
tB = _twist_nocopy(B, filter(x -> isdual(space(B, x)), allind(B)))
tB = twist(B, filter(x -> isdual(space(B, x)), allind(B)); copy = false)
dA = tensorcontract!(dA, ΔC, pΔC, false, tB, pB, true, ipA)
return projectA(dA)
end
dB_ = @thunk let
ipB = (codomainind(B), domainind(B))
pA = ((), allind(A))
dB = zerovector(B, promote_contract(scalartype(ΔC), scalartype(A)))
tA = _twist_nocopy(A, filter(x -> isdual(space(A, x)), allind(A)))
tA = twist(A, filter(x -> isdual(space(A, x)), allind(A)); copy = false)
dB = tensorcontract!(dB, tA, pA, true, ΔC, pΔC, false, ipB)
return projectB(dB)
end
Expand Down
98 changes: 55 additions & 43 deletions ext/TensorKitChainRulesCoreExt/tensoroperations.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# To avoid computing rrules for α and β when these aren't needed, we want to have a
# type-stable quick bail-out
_needs_tangent(x) = _needs_tangent(typeof(x))
_needs_tangent(::Type{<:Number}) = true
_needs_tangent(::Type{<:Integer}) = false
_needs_tangent(::Type{<:Union{One, Zero}}) = false

function ChainRulesCore.rrule(
::typeof(TensorOperations.tensoradd!),
C::AbstractTensorMap,
Expand All @@ -13,32 +20,36 @@ function ChainRulesCore.rrule(

function pullback(ΔC′)
ΔC = unthunk(ΔC′)
dC = @thunk projectC(scale(ΔC, conj(β)))
dC = β === Zero() ? ZeroTangent() : @thunk projectC(scale(ΔC, conj(β)))
dA = @thunk let
ipA = invperm(linearize(pA))
pdA = _repartition(ipA, A)
TA = promote_add(ΔC, α)
# TODO: allocator
_dA = tensoralloc_add(TA, ΔC, pdA, conjA, Val(false))
_dA = tensoradd!(_dA, ΔC, pdA, conjA, conjA ? α : conj(α), Zero(), ba...)
return projectA(_dA)
projectA(_dA)
end
= @thunk let
# TODO: this is an inner product implemented as a contraction
# for non-symmetric tensors this might be more efficient like this,
# but for symmetric tensors an intermediate object will anyways be created
# and then it might be more efficient to use an addition and inner product
tΔC = _twist_nocopy(ΔC, filter(x -> isdual(space(ΔC, x)), allind(ΔC)))
_dα = tensorscalar(
tensorcontract(
A, ((), linearize(pA)), !conjA,
tΔC, (trivtuple(TO.numind(pA)), ()), false,
((), ()), One(), ba...
= if _needs_tangent(α)
@thunk let
# TODO: this is an inner product implemented as a contraction
# for non-symmetric tensors this might be more efficient like this,
# but for symmetric tensors an intermediate object will anyways be created
# and then it might be more efficient to use an addition and inner product
tΔC = twist(ΔC, filter(x -> isdual(space(ΔC, x)), allind(ΔC)); copy = false)
_dα = tensorscalar(
tensorcontract(
A, ((), linearize(pA)), !conjA,
tΔC, (trivtuple(TO.numind(pA)), ()), false,
((), ()), One(), ba...
)
)
)
return projectα(_dα)
projectα(_dα)
end
else
ZeroTangent()
end
= @thunk projectβ(inner(C, ΔC))
= _needs_tangent(β) ? @thunk(projectβ(inner(C, ΔC))) : ZeroTangent()
dba = map(_ -> NoTangent(), ba)
return NoTangent(), dC, dA, NoTangent(), NoTangent(), dα, dβ, dba...
end
Expand Down Expand Up @@ -67,19 +78,19 @@ function ChainRulesCore.rrule(
ipAB = invperm(linearize(pAB))
pΔC = _repartition(ipAB, TO.numout(pA))

dC = @thunk projectC(scale(ΔC, conj(β)))
dC = β === Zero() ? ZeroTangent() : @thunk projectC(scale(ΔC, conj(β)))
dA = @thunk let
ipA = _repartition(invperm(linearize(pA)), A)
conjΔC = conjA
conjB′ = conjA ? conjB : !conjB
TA = promote_contract(scalartype(ΔC), scalartype(B), scalartype(α))
# TODO: allocator
tB = _twist_nocopy(
tB = twist(
B,
TupleTools.vcat(
filter(x -> !isdual(space(B, x)), pB[1]),
filter(x -> isdual(space(B, x)), pB[2])
)
); copy = false
)
_dA = tensoralloc_contract(
TA, ΔC, pΔC, conjΔC, tB, reverse(pB), conjB′, ipA, Val(false)
Expand All @@ -91,20 +102,20 @@ function ChainRulesCore.rrule(
ipA,
conjA ? α : conj(α), Zero(), ba...
)
return projectA(_dA)
projectA(_dA)
end
dB = @thunk let
ipB = _repartition(invperm(linearize(pB)), B)
conjΔC = conjB
conjA′ = conjB ? conjA : !conjA
TB = promote_contract(scalartype(ΔC), scalartype(A), scalartype(α))
# TODO: allocator
tA = _twist_nocopy(
tA = twist(
A,
TupleTools.vcat(
filter(x -> isdual(space(A, x)), pA[1]),
filter(x -> !isdual(space(A, x)), pA[2])
)
); copy = false
)
_dB = tensoralloc_contract(
TB, tA, reverse(pA), conjA′, ΔC, pΔC, conjΔC, ipB, Val(false)
Expand All @@ -116,14 +127,18 @@ function ChainRulesCore.rrule(
ipB,
conjB ? α : conj(α), Zero(), ba...
)
return projectB(_dB)
projectB(_dB)
end
= @thunk let
# TODO: this result should be AB = (C′ - βC) / α as C′ = βC + αAB
AB = tensorcontract(A, pA, conjA, B, pB, conjB, pAB, One(), ba...)
return projectα(inner(AB, ΔC))
= if _needs_tangent(α)
@thunk let
# TODO: this result should be AB = (C′ - βC) / α as C′ = βC + αAB
AB = tensorcontract(A, pA, conjA, B, pB, conjB, pAB, One(), ba...)
projectα(inner(AB, ΔC))
end
else
ZeroTangent()
end
= @thunk projectβ(inner(C, ΔC))
= _needs_tangent(β) ? @thunk(projectβ(inner(C, ΔC))) : ZeroTangent()
dba = map(_ -> NoTangent(), ba)
return NoTangent(), dC,
dA, NoTangent(), NoTangent(),
Expand All @@ -149,7 +164,7 @@ function ChainRulesCore.rrule(

function pullback(ΔC′)
ΔC = unthunk(ΔC′)
dC = @thunk projectC(scale(ΔC, conj(β)))
dC = β === Zero() ? ZeroTangent() : @thunk projectC(scale(ΔC, conj(β)))
dA = @thunk let
ip = invperm((linearize(p)..., q[1]..., q[2]...))
pdA = _repartition(ip, A)
Expand All @@ -163,15 +178,19 @@ function ChainRulesCore.rrule(
_dA = tensorproduct!(
_dA, ΔC, pΔC, conjA, E, pE, conjA, pdA, conjA ? α : conj(α), Zero(), ba...
)
return projectA(_dA)
projectA(_dA)
end
= @thunk let
# TODO: this result might be easier to compute as:
# C′ = βC + α * trace(A) ⟹ At = (C′ - βC) / α
At = tensortrace(A, p, q, conjA)
return projectα(inner(At, ΔC))
= if _needs_tangent(α)
@thunk let
# TODO: this result might be easier to compute as:
# C′ = βC + α * trace(A) ⟹ At = (C′ - βC) / α
At = tensortrace(A, p, q, conjA)
projectα(inner(At, ΔC))
end
else
ZeroTangent()
end
= @thunk projectβ(inner(C, ΔC))
= _needs_tangent(β) ? @thunk(projectβ(inner(C, ΔC))) : ZeroTangent()
dba = map(_ -> NoTangent(), ba)
return NoTangent(), dC, dA, NoTangent(), NoTangent(), NoTangent(), dα, dβ, dba...
end
Expand All @@ -188,10 +207,3 @@ function ChainRulesCore.rrule(::typeof(TensorKit.scalar), t::AbstractTensorMap)
end
return val, scalar_pullback
end

# temporary function to avoid copies when not needed
# TODO: remove once `twist(t; copy=false)` is defined
function _twist_nocopy(t, inds; kwargs...)
(BraidingStyle(sectortype(t)) isa Bosonic || isempty(inds)) && return t
return twist(t, inds; kwargs...)
end