Update ksizeof for workspaces of block-Krylov solvers

amontoison · amontoison · commit 1069f7943194 · 2025-08-26T02:09:06.000-05:00
diff --git a/docs/src/storage.md b/docs/src/storage.md
@@ -149,4 +149,4 @@ Base.format_bytes(free_nbytes)  # Total free memory in RAM in bytes.
 
 !!! note
     - Beyond having faster operations, using low precisions, such as simple precision, allows to store more coefficients in RAM and solve larger linear problems.
-    - In the file [test_allocations.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl/blob/main/test/test_allocations.jl), we use the macro `@allocated` to test that we match the expected storage requirement of each method with a tolerance of 2%.
+    - In the file [test_allocations.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl/blob/main/test/test_allocations.jl), we use the macro `@allocated` to verify that the storage requirements of each Krylov solver match the expected values, within a tolerance of 2% (and 5% for block Krylov solvers). These tests are performed across the four main precisions: `Float32`, `Float64`, `ComplexF32`, and `ComplexF64`.
diff --git a/src/krylov_show.jl b/src/krylov_show.jl
@@ -1,9 +1,9 @@
 import Base.show, Base.sizeof, Base.format_bytes
 
 function ksizeof(attribute)
-  if isa(attribute, Vector{<:AbstractVector}) && !isempty(attribute)
-    # A vector of vectors is a vector of pointers in Julia.
-    # All vectors inside a vector have the same size in Krylov.jl
+  if isa(attribute, Vector{<:AbstractArray}) && !isempty(attribute)
+    # A vector of arrays is a vector of pointers in Julia.
+    # All arrays inside a vector have the same size in Krylov.jl
     size_attribute = sizeof(attribute) + length(attribute) * ksizeof(attribute[1])
   else
     size_attribute = sizeof(attribute)
diff --git a/test/test_allocations.jl b/test/test_allocations.jl
@@ -12,7 +12,7 @@
       b   = Ao * ones(FC, k)  # Dimension m
       c   = Au * ones(FC, n)  # Dimension k
       B   = A * Matrix{FC}(I, m, p)  # Dimension m × p
-      mem = 200
+      mem = 100
 
       T = real(FC)
       shifts = T[1; 2; 3; 4; 5]
@@ -472,7 +472,7 @@
         (x, stats) = lslq(Ao, b)  # warmup
         actual_lslq_bytes = @allocated lslq(Ao, b)
         if VERSION < v"1.11.5" || !Sys.isapple()
-          @test expected_lslq_bytes ≤ actual_lslq_bytes ≤ 1.025 * expected_lslq_bytes
+          @test expected_lslq_bytes ≤ actual_lslq_bytes ≤ 1.02 * expected_lslq_bytes
         end
 
         workspace = LslqWorkspace(Ao, b)
@@ -723,21 +723,21 @@
         # - mem (2p*p)-matrices: H
         # - lwork-vector: buffer
         function storage_block_gmres_bytes(mem, n, p)
-          res = (2*n*p + p*p + 2p*p + mem*p + mem*n*p + mem*p*p + mem*(mem+1)*p*p/2 + mem*2p*p)
+          res = (2*n*p + p*p + 2p*p + mem*p + mem*n*p + mem*p*p + (mem*(mem+1)÷2)*p*p + mem*2p*p)
           return nbits_FC * res
         end
 
-        expected_block_gmres_bytes = storage_block_gmres_bytes(mem, n, p)
-        block_gmres(A, B; memory=mem, itmax=mem)  # warmup
-        actual_block_gmres_bytes = @allocated block_gmres(A, B; memory=mem, itmax=mem)
-        if VERSION < v"1.11.5" || !Sys.isapple()
-          @test expected_block_gmres_bytes ≤ actual_block_gmres_bytes ≤ 1.08 * expected_block_gmres_bytes
-        end
-
         workspace = BlockGmresWorkspace(A, B; memory=mem)
         block_gmres!(workspace, A, B)  # warmup
         inplace_block_gmres_bytes = @allocated block_gmres!(workspace, A, B)
         @test inplace_block_gmres_bytes == 0
+
+        expected_block_gmres_bytes = storage_block_gmres_bytes(mem, n, p) + sizeof(workspace.buffer)
+        block_gmres(A, B; memory=mem, itmax=mem)  # warmup
+        actual_block_gmres_bytes = @allocated block_gmres(A, B; memory=mem, itmax=mem)
+        if VERSION < v"1.11.5" || !Sys.isapple()
+          @test expected_block_gmres_bytes ≤ actual_block_gmres_bytes ≤ 1.05 * expected_block_gmres_bytes
+        end
       end
 
       @testset "BLOCK-MINRES" begin
@@ -752,21 +752,21 @@
         # - mem (2p*p)-matrices: H
         # - lwork-vector: buffer
         function storage_block_minres_bytes(mem, n, p)
-          res = (2*n*p + p*p + 2p*p + mem*p + mem*n*p + mem*p*p + mem*(mem+1)*p*p/2 + mem*2p*p)
+          res = (2*n*p + p*p + 2p*p + mem*p + mem*n*p + mem*p*p + (mem*(mem+1)÷2)*p*p + mem*2p*p)
           return nbits_FC * res
         end
 
-        expected_block_minres_bytes = storage_block_minres_bytes(mem, n, p)
+        workspace = BlockMinresWorkspace(A, B)
+        block_minres!(workspace, A, B)  # warmup
+        inplace_block_minres_bytes = @allocated block_minres!(workspace, A, B)
+        @test inplace_block_minres_bytes == 0
+
+        expected_block_minres_bytes = storage_block_minres_bytes(mem, n, p) + sizeof(workspace.buffer)
         block_minres(A, B)  # warmup
         # actual_block_minres_bytes = @allocated block_minres(A, B)
         # if VERSION < v"1.11.5" || !Sys.isapple()
-        #   @test expected_block_minres_bytes ≤ actual_block_minres_bytes ≤ 1.08 * expected_block_minres_bytes
+        #   @test expected_block_minres_bytes ≤ actual_block_minres_bytes ≤ 1.05 * expected_block_minres_bytes
         # end
-
-        Workspace = BlockMinresWorkspace(A, B)
-        block_minres!(Workspace, A, B)  # warmup
-        inplace_block_minres_bytes = @allocated block_minres!(Workspace, A, B)
-        @test inplace_block_minres_bytes == 0
       end
     end
   end