Use Octavian.jl for large mixed-mode CPU calculations.

maleadt · maleadt · commit be74c173053d · 2023-07-02T10:10:34.000+02:00
diff --git a/test/Project.toml b/test/Project.toml
@@ -5,5 +5,6 @@ Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 XUnit = "3e3c03f2-1a94-11e9-2981-050a4ca824ab"
diff --git a/test/matmul.jl b/test/matmul.jl
@@ -1,7 +1,26 @@
 using CUDA
 using ForwardDiff
 using GemmKernels
-using LinearAlgebra
+import Octavian, LinearAlgebra
+
+# for large, non-BLAS-compatible matrices, use Octavian.
+matmul!(C, A, B, alpha=true, beta=false) = LinearAlgebra.mul!(C, A, B, alpha, beta)
+function matmul!(C::Array,
+                 A::Union{Array, LinearAlgebra.Transpose{<:Any, <:Array},
+                                 LinearAlgebra.Adjoint{<:Any, <:Array}},
+                 B::Union{Array, LinearAlgebra.Transpose{<:Any, <:Array},
+                                 LinearAlgebra.Adjoint{<:Any, <:Array}},
+                 alpha::Bool=true, beta::Bool=false)
+    supported = eltype(C) <: LinearAlgebra.BlasFloat &&
+                eltype(A) <: LinearAlgebra.BlasFloat &&
+                eltype(B) <: LinearAlgebra.BlasFloat &&
+                eltype(C) == eltype(A) == eltype(B)
+    if !supported && (sizeof(C) > 2^20 || sizeof(A) > 2^20 || sizeof(B) > 2^20)
+        Octavian.matmul!(C, A, B, alpha, beta)
+    else
+        LinearAlgebra.mul!(C, A, B, alpha, beta)
+    end
+end
 
 ################################################################################
 
@@ -63,7 +82,7 @@ using LinearAlgebra
             new_a_h = transpose_a ? transpose(a_h) : a_h
             new_b_h = transpose_b ? transpose(b_h) : b_h
 
-            mul!(c_h, new_a_h, new_b_h, alpha, beta)
+            matmul!(c_h, new_a_h, new_b_h, alpha, beta)
             if A_type <: Integer
                 @test c_h ≈ Array(d)
             else
@@ -121,7 +140,7 @@ using LinearAlgebra
             new_a_h = transpose_a ? transpose(a_h) : a_h
             new_b_h = transpose_b ? transpose(b_h) : b_h
 
-            mul!(c_h, new_a_h, new_b_h, alpha, beta)
+            matmul!(c_h, new_a_h, new_b_h, alpha, beta)
             @test c_h ≈ Array(d) rtol=sqrt(eps(A_type))
         end
     end
@@ -222,7 +241,7 @@ using LinearAlgebra
             new_a_h = transpose_a ? transpose(a_h) : a_h
             new_b_h = transpose_b ? transpose(b_h) : b_h
 
-            mul!(c_h, new_a_h, new_b_h, alpha, beta)
+            matmul!(c_h, new_a_h, new_b_h, alpha, beta)
             @test c_h ≈ Array(d) rtol=sqrt(eps(AB_type))
         end
     end
@@ -274,7 +293,7 @@ using LinearAlgebra
             new_a_h = transpose_a ? transpose(a_h) : a_h
             new_b_h = transpose_b ? transpose(b_h) : b_h
 
-            mul!(c_h, new_a_h, new_b_h, true, true)
+            matmul!(c_h, new_a_h, new_b_h, true, true)
             @test c_h .+ Array(bias) ≈ Array(d) rtol=sqrt(eps(Float16))
         end
     end
@@ -319,7 +338,7 @@ using LinearAlgebra
             new_a_h = transpose_a ? transpose(a_h) : a_h
             new_b_h = transpose_b ? transpose(b_h) : b_h
 
-            mul!(c_h, Diagonal(new_a_h), new_b_h, true, true)
+            matmul!(c_h, Diagonal(new_a_h), new_b_h, true, true)
             @test c_h ≈ Array(d) rtol=sqrt(eps(Float16))
         end
     end
@@ -383,7 +402,7 @@ using LinearAlgebra
             new_a_h = transpose_a ? transpose(new_a_h) : new_a_h
             new_b_h = transpose_b ? transpose(new_b_h) : new_b_h
 
-            mul!(c_h, new_a_h, new_b_h, true, true)
+            matmul!(c_h, new_a_h, new_b_h, true, true)
             @test c_h ≈ Array(d) rtol=sqrt(eps(Float16))
         end
     end
@@ -436,7 +455,7 @@ using LinearAlgebra
             c_dual = reinterpret(ForwardDiff.Dual{Float32,Float32,1}, c_h)
             d_dual = reinterpret(ForwardDiff.Dual{Float32,Float32,1}, Array(d))
 
-            mul!(c_dual, a_dual, b_dual, true, true)
+            matmul!(c_dual, a_dual, b_dual, true, true)
             @test c_dual ≈ d_dual rtol=sqrt(eps(Float16))
         end
     end