Add buffer allocator

lkdvos · lkdvos · commit eca22fd44148 · 2025-12-16T23:04:51.000-05:00
diff --git a/src/MPSKit.jl b/src/MPSKit.jl
@@ -99,6 +99,7 @@ include("utility/logging.jl")
 using .IterativeLoggers
 include("utility/iterativesolvers.jl")
 
+include("utility/allocator.jl")
 include("utility/styles.jl")
 include("utility/periodicarray.jl")
 include("utility/windowarray.jl")
diff --git a/src/algorithms/derivatives/derivatives.jl b/src/algorithms/derivatives/derivatives.jl
@@ -222,10 +222,10 @@ Base.:*(h::LazySum{<:Union{DerivativeOrMultiplied}}, v) = h(v)
 Given an operator and vector, try to construct a more efficient representation of that operator for repeated application.
 This should always be used in conjunction with [`unprepare_operator!!`](@ref).
 """
-prepare_operator!!(O, backend::AbstractBackend = DefaultBackend(), allocator = DefaultAllocator()) = O
+prepare_operator!!(O, backend::AbstractBackend = DefaultBackend(), allocator = GrowingBuffer()) = O
 
 # to make benchmark scripts run
-prepare_operator!!(O, x::AbstractTensorMap, backend::AbstractBackend = DefaultBackend(), allocator = DefaultAllocator()) =
+prepare_operator!!(O, x::AbstractTensorMap, backend::AbstractBackend = DefaultBackend(), allocator = GrowingBuffer()) =
     prepare_operator!!(O, backend, allocator), x
-unprepare_operator!!(y, O, x, backend::AbstractBackend = DefaultBackend(), allocator = DefaultAllocator()) =
+unprepare_operator!!(y, O, x, backend::AbstractBackend = DefaultBackend(), allocator = GrowingBuffer()) =
     y
diff --git a/src/algorithms/derivatives/mpo_derivatives.jl b/src/algorithms/derivatives/mpo_derivatives.jl
@@ -134,9 +134,11 @@ function prepare_operator!!(
         H::MPO_AC_Hamiltonian{<:MPSTensor, <:MPOTensor, <:MPSTensor},
         backend::AbstractBackend, allocator
     )
+    cp = checkpoint(allocator)
     @plansor backend = backend allocator = allocator begin
         GL_O[-1 -2; -4 -5 -3] := H.leftenv[-1 1; -4] * H.operators[1][1 -2; -5 -3]
     end
+    reset!(allocator, cp)
     leftenv = fuse_legs(TensorMap(GL_O), 0, 2)
     rightenv = TensorMap(H.rightenv)
 
@@ -147,66 +149,76 @@ function prepare_operator!!(
         H::MPO_AC2_Hamiltonian{<:MPSTensor, <:MPOTensor, <:MPOTensor, <:MPSTensor},
         backend::AbstractBackend, allocator
     )
+    cp = checkpoint(allocator)
     @plansor backend = backend allocator = allocator begin
         GL_O[-1 -2; -4 -5 -3] := H.leftenv[-1 1; -4] * H.operators[1][1 -2; -5 -3]
         O_GR[-1 -2 -3; -4 -5] := H.operators[2][-3 -5; -2 1] * H.rightenv[-1 1; -4]
     end
+    reset!(allocator, cp)
+
     leftenv = fuse_legs(GL_O isa TensorMap ? GL_O : TensorMap(GL_O), 0, 2)
     rightenv = fuse_legs(O_GR isa TensorMap ? O_GR : TensorMap(O_GR), 2, 0)
     return PrecomputedDerivative(leftenv, rightenv, backend, allocator)
 end
 
 
 function (H::PrecomputedDerivative)(x::AbstractTensorMap)
-    R_fused = fuse_legs(H.rightenv, 0, 2)
+    allocator = H.allocator
+    cp = checkpoint(allocator)
+
+    R_fused = fuse_legs(H.rightenv, 0, numin(x))
     x_fused = fuse_legs(x, numout(x), numin(x))
 
-    # xR = matrix_contract(R_fused, x_fused, 1, One(), H.backend, H.allocator; transpose = true)
 
     TC = TensorOperations.promote_contract(scalartype(x_fused), scalartype(R_fused))
     xR = TensorOperations.tensoralloc_contract(TC, x_fused, ((1,), (2,)), false, R_fused, ((1,), (2, 3)), false, ((1, 2), (3,)), Val(true), H.allocator)
 
-    structure_xR = TensorKit.fusionblockstructure(space(xR))
-    structure_R = TensorKit.fusionblockstructure(space(R_fused))
-
-    xblocks = blocks(x_fused)
-    for ((f₁, f₂), i1) in structure_xR.fusiontreeindices
-        sz, str, offset = structure_xR.fusiontreestructure[i1]
-        xr = TensorKit.Strided.StridedView(xR.data, sz, str, offset)
-
-        u = first(f₁.uncoupled)
-        x = TensorKit.Strided.StridedView(xblocks[u])
-        isempty(x) && (zerovector!(xr); continue)
-
-        if haskey(structure_R.fusiontreeindices, (f₁, f₂))
-            @inbounds i = structure_R.fusiontreeindices[(f₁, f₂)]
-            @inbounds sz, str, offset = structure_R.fusiontreestructure[i]
-            r = TensorKit.Strided.StridedView(R_fused.data, sz, str, offset)
-
-            if TensorOperations.isblascontractable(r, ((1,), (2, 3))) &&
-                    TensorOperations.isblasdestination(xr, ((1,), (2, 3)))
-                C = TensorKit.Strided.sreshape(xr, size(xr, 1), size(xr, 2) * size(xr, 3))
-                B = TensorKit.Strided.sreshape(r, size(r, 1), size(r, 2) * size(r, 3))
-                LinearAlgebra.BLAS.gemm!('N', 'N', one(TC), x, B, zero(TC), C)
-            elseif sz[2] < sz[3]
-                for k in axes(r, 2)
-                    C = xr[:, k, :]
-                    B = r[:, k, :]
-                    LinearAlgebra.BLAS.gemm!('N', 'N', one(TC), x, B, zero(TC), C)
-                end
-            else
-                for k in axes(r, 3)
-                    C = xr[:, :, k]
-                    B = r[:, :, k]
-                    LinearAlgebra.BLAS.gemm!('N', 'N', one(TC), x, B, zero(TC), C)
-                end
-            end
-        else
-            zerovector!(xr)
-        end
-    end
+    matrix_contract!(xR, R_fused, x_fused, 1, One(), Zero(), H.backend, H.allocator; transpose = true)
+
+    # structure_xR = TensorKit.fusionblockstructure(space(xR))
+    # structure_R = TensorKit.fusionblockstructure(space(R_fused))
+
+    # xblocks = blocks(x_fused)
+    # for ((f₁, f₂), i1) in structure_xR.fusiontreeindices
+    #     sz, str, offset = structure_xR.fusiontreestructure[i1]
+    #     xr = TensorKit.Strided.StridedView(xR.data, sz, str, offset)
+
+    #     u = first(f₁.uncoupled)
+    #     x = TensorKit.Strided.StridedView(xblocks[u])
+    #     isempty(x) && (zerovector!(xr); continue)
+
+    #     if haskey(structure_R.fusiontreeindices, (f₁, f₂))
+    #         @inbounds i = structure_R.fusiontreeindices[(f₁, f₂)]
+    #         @inbounds sz, str, offset = structure_R.fusiontreestructure[i]
+    #         r = TensorKit.Strided.StridedView(R_fused.data, sz, str, offset)
+
+    #         if TensorOperations.isblascontractable(r, ((1,), (2, 3))) &&
+    #                 TensorOperations.isblasdestination(xr, ((1,), (2, 3)))
+    #             C = TensorKit.Strided.sreshape(xr, size(xr, 1), size(xr, 2) * size(xr, 3))
+    #             B = TensorKit.Strided.sreshape(r, size(r, 1), size(r, 2) * size(r, 3))
+    #             LinearAlgebra.BLAS.gemm!('N', 'N', one(TC), x, B, zero(TC), C)
+    #         elseif sz[2] < sz[3]
+    #             for k in axes(r, 2)
+    #                 C = xr[:, k, :]
+    #                 B = r[:, k, :]
+    #                 LinearAlgebra.BLAS.gemm!('N', 'N', one(TC), x, B, zero(TC), C)
+    #             end
+    #         else
+    #             for k in axes(r, 3)
+    #                 C = xr[:, :, k]
+    #                 B = r[:, :, k]
+    #                 LinearAlgebra.BLAS.gemm!('N', 'N', one(TC), x, B, zero(TC), C)
+    #             end
+    #         end
+    #     else
+    #         zerovector!(xr)
+    #     end
+    # end
 
     LxR = H.leftenv * xR
+    TensorOperations.tensorfree!(xR, H.allocator)
+
+    reset!(allocator, cp)
     return TensorMap{scalartype(LxR)}(LxR.data, codomain(H.leftenv) ← domain(H.rightenv))
 end
 
@@ -219,8 +231,3 @@ const _ToPrepare = Union{
 function prepare_operator!!(H::Multiline{<:_ToPrepare}, backend::AbstractBackend, allocator)
     return Multiline(map(x -> prepare_operator!!(x, backend, allocator), parent(H)))
 end
-
-fixedpoint(A::Union{_ToPrepare, Multiline{<:_ToPrepare}}, x₀, which::Symbol, alg::Lanczos) =
-    fixedpoint(prepare_operator!!(A), x₀, which, alg)
-fixedpoint(A::Union{_ToPrepare, Multiline{<:_ToPrepare}}, x₀, which::Symbol, alg::Arnoldi) =
-    fixedpoint(prepare_operator!!(A), x₀, which, alg)
diff --git a/src/utility/allocator.jl b/src/utility/allocator.jl
@@ -0,0 +1,67 @@
+@static if isdefined(Core, :Memory)
+    BufType = Memory{UInt8}
+else
+    BufType = Vector{UInt8}
+end
+
+const DEFAULT_SIZEHINT = 2^20 # 1MB
+
+mutable struct GrowingBuffer
+    buffer::BufType
+    offset::UInt
+    function GrowingBuffer(; sizehint = DEFAULT_SIZEHINT)
+        buffer = BufType(undef, sizehint)
+        return new(buffer, zero(UInt))
+    end
+end
+
+Base.length(buffer::GrowingBuffer) = length(buffer.buffer)
+Base.pointer(buffer::GrowingBuffer) = pointer(buffer.buffer) + buffer.offset
+
+function Base.sizehint!(buffer::GrowingBuffer, n::Integer; shrink::Bool = false)
+    n > 0 || throw(ArgumentError("invalid new buffer size"))
+    buffer.offset == 0 || error("cannot resize a buffer that is not fully reset")
+
+    n = shrink ? max(n, length(buffer)) : n
+    n = Int(Base.nextpow(2, n))
+
+    @static if isdefined(Core, :Memory)
+        buffer.buffer = BufType(undef, n)
+    else
+        sizehint!(buffer.buffer, n)
+    end
+    return buffer
+end
+
+checkpoint(buffer) = zero(UInt)
+reset!(buffer, checkpoint::UInt = zero(UInt)) = buffer
+
+checkpoint(buffer::GrowingBuffer) = buffer.offset
+
+function reset!(buffer::GrowingBuffer, checkpoint::UInt = zero(UInt))
+    if iszero(checkpoint) && buffer.offset > length(buffer)
+        # full reset - check for need to grow
+        newlength = Base.nextpow(2, buffer.offset) # round to nearest larger power of 2
+        buffer.offset = checkpoint
+        sizehint!(buffer, newlength)
+    else
+        buffer.offset = checkpoint
+    end
+    return buffer
+end
+
+# Allocating
+# ----------
+function TensorOperations.tensoralloc(
+        ::Type{A}, structure, ::Val{istemp}, buffer::GrowingBuffer
+    ) where {A <: AbstractArray, istemp}
+    T = eltype(A)
+    if istemp
+        ptr = convert(Ptr{T}, pointer(buffer))
+        buffer.offset += prod(structure) * sizeof(T)
+        buffer.offset < length(buffer) &&
+            return Base.unsafe_wrap(Array, ptr, structure)
+    end
+    return A(undef, structure)
+end
+TensorOperations.tensorfree!(::AbstractArray, ::GrowingBuffer) = nothing