From d8379497c36114cab6ebface9ffed2ac1e0d4a48 Mon Sep 17 00:00:00 2001 From: Jutho Date: Sat, 2 Nov 2024 01:21:04 +0100 Subject: [PATCH 1/7] complete overhaul --- Project.toml | 9 +- src/LibTBLIS.jl | 109 ++++++++++++++ src/LibTblis.jl | 146 ------------------ src/TensorOperationsTBLIS.jl | 97 +++--------- src/strided.jl | 172 +++++++++++++++++++++ test/methods.jl | 244 ++++++++++++++++++++++++++++++ test/runtests.jl | 146 ++---------------- test/tensor.jl | 285 +++++++++++++++++++++++++++++++++++ 8 files changed, 850 insertions(+), 358 deletions(-) create mode 100644 src/LibTBLIS.jl delete mode 100644 src/LibTblis.jl create mode 100644 src/strided.jl create mode 100644 test/methods.jl create mode 100644 test/tensor.jl diff --git a/Project.toml b/Project.toml index 07b8e9d..70ed301 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TensorOperationsTBLIS" uuid = "1e289f0c-8058-4c3e-8acf-f8ef036bd865" -authors = ["lkdvos "] -version = "0.2.0" +authors = ["Lukas Devos ", "Jutho Haegeman "] +version = "0.3.0" [deps] Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" @@ -11,14 +11,17 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6" tblis_jll = "9c7f617c-f299-5d18-afb6-044c7798b3d0" [compat] +Random = "1" TensorOperations = "5" TupleTools = "1" +Test = "1" julia = "1.8" tblis_jll = "1.2" [extras] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "LinearAlgebra"] +test = ["Test", "LinearAlgebra", "Random"] diff --git a/src/LibTBLIS.jl b/src/LibTBLIS.jl new file mode 100644 index 0000000..3954b80 --- /dev/null +++ b/src/LibTBLIS.jl @@ -0,0 +1,109 @@ +module LibTBLIS + +using tblis_jll +using LinearAlgebra: BlasFloat + +export tblis_scalar, tblis_tensor +export tblis_tensor_add, tblis_tensor_mult, tblis_tensor_dot +export tblis_set_num_threads, tblis_get_num_threads + +const ptrdiff_t = Cptrdiff_t + +const scomplex = ComplexF32 +const dcomplex = ComplexF64 + +const IS_LIBC_MUSL = occursin("musl", Base.BUILD_TRIPLET) +if Sys.isapple() && Sys.ARCH === :aarch64 + include("lib/aarch64-apple-darwin20.jl") +elseif Sys.islinux() && Sys.ARCH === :aarch64 && !IS_LIBC_MUSL + include("lib/aarch64-linux-gnu.jl") +elseif Sys.islinux() && Sys.ARCH === :aarch64 && IS_LIBC_MUSL + include("lib/aarch64-linux-musl.jl") +elseif Sys.islinux() && startswith(string(Sys.ARCH), "arm") && !IS_LIBC_MUSL + include("lib/armv7l-linux-gnueabihf.jl") +elseif Sys.islinux() && startswith(string(Sys.ARCH), "arm") && IS_LIBC_MUSL + include("lib/armv7l-linux-musleabihf.jl") +elseif Sys.islinux() && Sys.ARCH === :i686 && !IS_LIBC_MUSL + include("lib/i686-linux-gnu.jl") +elseif Sys.islinux() && Sys.ARCH === :i686 && IS_LIBC_MUSL + include("lib/i686-linux-musl.jl") +elseif Sys.iswindows() && Sys.ARCH === :i686 + include("lib/i686-w64-mingw32.jl") +elseif Sys.islinux() && Sys.ARCH === :powerpc64le + include("lib/powerpc64le-linux-gnu.jl") +elseif Sys.isapple() && Sys.ARCH === :x86_64 + include("lib/x86_64-apple-darwin14.jl") +elseif Sys.islinux() && Sys.ARCH === :x86_64 && !IS_LIBC_MUSL + include("lib/x86_64-linux-gnu.jl") +elseif Sys.islinux() && Sys.ARCH === :x86_64 && IS_LIBC_MUSL + include("lib/x86_64-linux-musl.jl") +elseif Sys.isbsd() && !Sys.isapple() + include("lib/x86_64-unknown-freebsd.jl") +elseif Sys.iswindows() && Sys.ARCH === :x86_64 + include("lib/x86_64-w64-mingw32.jl") +else + error("Unknown platform: $(Base.BUILD_TRIPLET)") +end + +# tblis_scalar and tblis_tensor +# ----------------------------- +""" + tblis_scalar(s::Number) + +Initializes a tblis scalar from a number. +""" +function tblis_scalar end + +""" + tblis_tensor(A::AbstractArray{T<:BlasFloat}, [szA::Vector{Int}, strA::Vector{Int}, scalar::Number]) + +Initializes a tblis tensor from an array that should be strided and admit a pointer to its +data. This operation is deemed unsafe, in the sense that the user is responsible for ensuring +that the reference to the array and the sizes and strides stays alive during the lifetime of +the tensor. +""" +function tblis_tensor end + +for (T, tblis_init_scalar, tblis_init_tensor, tblis_init_tensor_scaled) in + ((:Float32, :tblis_init_scalar_s, :tblis_init_tensor_s, :tblis_init_tensor_scaled_s), + (:Float64, :tblis_init_scalar_d, :tblis_init_tensor_d, :tblis_init_tensor_scaled_d), + (:ComplexF32, :tblis_init_scalar_c, :tblis_init_tensor_c, :tblis_init_tensor_scaled_c), + (:ComplexF64, :tblis_init_scalar_z, :tblis_init_tensor_z, :tblis_init_tensor_scaled_z)) + @eval begin + function tblis_scalar(s::$T) + t = Ref{tblis_scalar}() + $tblis_init_scalar(t, s) + return t[] + end + function tblis_tensor(A::AbstractArray{$T,N}, + s::Number=one(T), + szA::Vector{len_type}=collect(len_type, size(A)), + strA::Vector{stride_type}=collect(stride_type, strides(A))) where {N} + t = Ref{tblis_tensor}() + if isone(s) + $tblis_init_tensor(t, N, pointer(szA), pointer(A), pointer(strA)) + else + $tblis_init_tensor_scaled(t, $T(s), N, pointer(szA), pointer(A), + pointer(strA)) + end + return t[] + end + end +end + +# tensor operations +# ------------------ +function tblis_tensor_add(A::tblis_tensor, idxA, B::tblis_tensor, idxB) + return tblis_tensor_add(C_NULL, C_NULL, Ref(A), idxA, Ref(B), idxB) +end + +function tblis_tensor_mult(A::tblis_tensor, idxA, B::tblis_tensor, idxB, C::tblis_tensor, + idxC) + return tblis_tensor_mult(C_NULL, C_NULL, Ref(A), idxA, Ref(B), idxB, Ref(C), idxC) +end + +function tblis_tensor_dot(A::tblis_tensor, idxA, B::tblis_tensor, idxB, C::tblis_scalar) + return tblis_tensor_dot(C_NULL, C_NULL, Ref(A), idxA, Ref(B), idxB, Ref(C)) +end + +end diff --git a/src/LibTblis.jl b/src/LibTblis.jl deleted file mode 100644 index 013ec79..0000000 --- a/src/LibTblis.jl +++ /dev/null @@ -1,146 +0,0 @@ -module LibTblis - -using tblis_jll -using LinearAlgebra: BlasFloat - -export tblis_scalar, tblis_matrix, tblis_tensor -export tblis_tensor_add, tblis_tensor_mult, tblis_tensor_dot -export tblis_set_num_threads, tblis_get_num_threads - -const ptrdiff_t = Cptrdiff_t - -const scomplex = ComplexF32 -const dcomplex = ComplexF64 - -const IS_LIBC_MUSL = occursin("musl", Base.BUILD_TRIPLET) -if Sys.isapple() && Sys.ARCH === :aarch64 - include("lib/aarch64-apple-darwin20.jl") -elseif Sys.islinux() && Sys.ARCH === :aarch64 && !IS_LIBC_MUSL - include("lib/aarch64-linux-gnu.jl") -elseif Sys.islinux() && Sys.ARCH === :aarch64 && IS_LIBC_MUSL - include("lib/aarch64-linux-musl.jl") -elseif Sys.islinux() && startswith(string(Sys.ARCH), "arm") && !IS_LIBC_MUSL - include("lib/armv7l-linux-gnueabihf.jl") -elseif Sys.islinux() && startswith(string(Sys.ARCH), "arm") && IS_LIBC_MUSL - include("lib/armv7l-linux-musleabihf.jl") -elseif Sys.islinux() && Sys.ARCH === :i686 && !IS_LIBC_MUSL - include("lib/i686-linux-gnu.jl") -elseif Sys.islinux() && Sys.ARCH === :i686 && IS_LIBC_MUSL - include("lib/i686-linux-musl.jl") -elseif Sys.iswindows() && Sys.ARCH === :i686 - include("lib/i686-w64-mingw32.jl") -elseif Sys.islinux() && Sys.ARCH === :powerpc64le - include("lib/powerpc64le-linux-gnu.jl") -elseif Sys.isapple() && Sys.ARCH === :x86_64 - include("lib/x86_64-apple-darwin14.jl") -elseif Sys.islinux() && Sys.ARCH === :x86_64 && !IS_LIBC_MUSL - include("lib/x86_64-linux-gnu.jl") -elseif Sys.islinux() && Sys.ARCH === :x86_64 && IS_LIBC_MUSL - include("lib/x86_64-linux-musl.jl") -elseif Sys.isbsd() && !Sys.isapple() - include("lib/x86_64-unknown-freebsd.jl") -elseif Sys.iswindows() && Sys.ARCH === :x86_64 - include("lib/x86_64-w64-mingw32.jl") -else - error("Unknown platform: $(Base.BUILD_TRIPLET)") -end - -# tblis_scalar -# ------------ -""" - tblis_scalar(s::Number) - -Initializes a tblis scalar from a number. -""" -function tblis_scalar end -function tblis_scalar(s::Float32) - t = Ref{tblis_scalar}() - tblis_init_scalar_s(t, s) - return t[] -end -function tblis_scalar(s::Float64) - t = Ref{tblis_scalar}() - tblis_init_scalar_d(t, s) - return t[] -end -function tblis_scalar(s::ComplexF32) - t = Ref{tblis_scalar}() - tblis_init_scalar_c(t, s) - return t[] -end -function tblis_scalar(s::ComplexF64) - t = Ref{tblis_scalar}() - tblis_init_scalar_z(t, s) - return t[] -end - -# tblis_tensor -# ------------ -""" - tblis_tensor(A::StridedArray{T<:BlasFloat}, szA::Vector{Int}, strA::Vector{Int}, s=one(T)) - -Initializes a tblis tensor from a strided array. This operation is deemed unsafe, in the -sense that the user is responsible for ensuring that the reference to the array and the -sizes and strides stays alive during the lifetime of the tensor. -""" -function tblis_tensor end - -function tblis_tensor(A::StridedArray{Float32,N}, szA::Vector{Int}, strA::Vector{Int}, - s::Number=one(Float32)) where {N} - t = Ref{tblis_tensor}() - if isone(s) - tblis_init_tensor_s(t, N, pointer(szA), pointer(A), pointer(strA)) - else - tblis_init_tensor_scaled_s(t, Float32(s), N, pointer(szA), pointer(A), - pointer(strA)) - end - return t[] -end -function tblis_tensor(A::StridedArray{Float64,N}, szA::Vector{Int}, strA::Vector{Int}, - s::Number=one(Float64)) where {N} - t = Ref{tblis_tensor}() - if isone(s) - tblis_init_tensor_d(t, N, pointer(szA), pointer(A), pointer(strA)) - else - tblis_init_tensor_scaled_d(t, Float64(s), N, pointer(szA), pointer(A), - pointer(strA)) - end - return t[] -end -function tblis_tensor(A::StridedArray{ComplexF32,N}, szA::Vector{Int}, strA::Vector{Int}, - s::Number=one(ComplexF32)) where {N} - t = Ref{tblis_tensor}() - if isone(s) - tblis_init_tensor_c(t, N, pointer(szA), pointer(A), pointer(strA)) - else - tblis_init_tensor_scaled_c(t, ComplexF32(s), N, pointer(szA), pointer(A), - pointer(strA)) - end - return t[] -end -function tblis_tensor(A::StridedArray{ComplexF64,N}, szA::Vector{Int}, strA::Vector{Int}, - s::Number=one(ComplexF64)) where {N} - t = Ref{tblis_tensor}() - if isone(s) - tblis_init_tensor_z(t, N, pointer(szA), pointer(A), pointer(strA)) - else - tblis_init_tensor_scaled_z(t, ComplexF64(s), N, pointer(szA), pointer(A), - pointer(strA)) - end - return t[] -end - -function tblis_tensor_add(A::tblis_tensor, idxA, B::tblis_tensor, idxB) - return tblis_tensor_add(C_NULL, C_NULL, Ref(A), idxA, Ref(B), idxB) -end - -function tblis_tensor_mult(A::tblis_tensor, idxA, B::tblis_tensor, idxB, C::tblis_tensor, - idxC) - return tblis_tensor_mult(C_NULL, C_NULL, Ref(A), idxA, Ref(B), idxB, Ref(C), idxC) -end - -function tblis_tensor_dot(A::tblis_tensor, idxA, B::tblis_tensor, idxB, C::tblis_scalar) - return tblis_tensor_dot(C_NULL, C_NULL, Ref(A), idxA, Ref(B), idxB, Ref(C)) -end - -end diff --git a/src/TensorOperationsTBLIS.jl b/src/TensorOperationsTBLIS.jl index 83192ed..2ddf149 100644 --- a/src/TensorOperationsTBLIS.jl +++ b/src/TensorOperationsTBLIS.jl @@ -1,93 +1,34 @@ module TensorOperationsTBLIS using TensorOperations +using TensorOperations: StridedView, DefaultAllocator, IndexError +using TensorOperations: istrivialpermutation, BlasFloat, linearize +using TensorOperations: argcheck_tensoradd, dimcheck_tensoradd, + argcheck_tensortrace, dimcheck_tensortrace, + argcheck_tensorcontract, dimcheck_tensorcontract using TensorOperations: Index2Tuple, IndexTuple, linearize, IndexError -using LinearAlgebra: BlasFloat, rmul! +using LinearAlgebra: BlasFloat using TupleTools -include("LibTblis.jl") -using .LibTblis +include("LibTBLIS.jl") +using .LibTBLIS +using .LibTBLIS: LibTBLIS, len_type, stride_type -export tblis_set_num_threads, tblis_get_num_threads -export tblisBackend +export TBLIS +export get_num_tblis_threads, set_num_tblis_threads + +get_num_tblis_threads() = convert(Int, LibTBLIS.tblis_get_num_threads()) +set_num_tblis_threads(n) = LibTBLIS.tblis_set_num_threads(convert(Cuint, n)) # TensorOperations #------------------ -struct tblisBackend <: TensorOperations.AbstractBackend end - -function TensorOperations.tensoradd!(C::StridedArray{T}, A::StridedArray{T}, - pA::Index2Tuple, conjA::Bool, - α::Number, β::Number, - ::tblisBackend) where {T<:BlasFloat} - TensorOperations.argcheck_tensoradd(C, A, pA) - TensorOperations.dimcheck_tensoradd(C, A, pA) - - szC = collect(size(C)) - strC = collect(strides(C)) - C_tblis = tblis_tensor(C, szC, strC, β) - - szA = collect(size(A)) - strA = collect(strides(A)) - A_tblis = tblis_tensor(conjA ? conj(A) : A, szA, strA, α) - - einA, einC = TensorOperations.add_labels(pA) - tblis_tensor_add(A_tblis, string(einA...), C_tblis, string(einC...)) - - return C -end - -function TensorOperations.tensorcontract!(C::StridedArray{T}, - A::StridedArray{T}, pA::Index2Tuple, - conjA::Bool, B::StridedArray{T}, - pB::Index2Tuple, conjB::Bool, pAB::Index2Tuple, - α::Number, β::Number, - ::tblisBackend) where {T<:BlasFloat} - TensorOperations.argcheck_tensorcontract(C, A, pA, B, pB, pAB) - TensorOperations.dimcheck_tensorcontract(C, A, pA, B, pB, pAB) - - rmul!(C, β) # TODO: is it possible to use tblis scaling here? - szC = ndims(C) == 0 ? Int[] : collect(size(C)) - strC = ndims(C) == 0 ? Int[] : collect(strides(C)) - C_tblis = tblis_tensor(C, szC, strC) - - szA = collect(size(A)) - strA = collect(strides(A)) - A_tblis = tblis_tensor(conjA ? conj(A) : A, szA, strA, α) - - szB = collect(size(B)) - strB = collect(strides(B)) - B_tblis = tblis_tensor(conjB ? conj(B) : B, szB, strB, 1) - - einA, einB, einC = TensorOperations.contract_labels(pA, pB, pAB) - tblis_tensor_mult(A_tblis, string(einA...), B_tblis, string(einB...), C_tblis, - string(einC...)) - - return C -end - -function TensorOperations.tensortrace!(C::StridedArray{T}, - A::StridedArray{T}, p::Index2Tuple, q::Index2Tuple, - conjA::Bool, - α::Number, β::Number, - ::tblisBackend) where {T<:BlasFloat} - TensorOperations.argcheck_tensortrace(C, A, p, q) - TensorOperations.dimcheck_tensortrace(C, A, p, q) - - rmul!(C, β) # TODO: is it possible to use tblis scaling here? - szC = ndims(C) == 0 ? Int[] : collect(size(C)) - strC = ndims(C) == 0 ? Int[] : collect(strides(C)) - C_tblis = tblis_tensor(C, szC, strC) - - szA = collect(size(A)) - strA = collect(strides(A)) - A_tblis = tblis_tensor(conjA ? conj(A) : A, szA, strA, α) - - einA, einC = TensorOperations.trace_labels(p, q) +struct TBLIS <: TensorOperations.AbstractBackend end - tblis_tensor_add(A_tblis, string(einA...), C_tblis, string(einC...)) +Base.@deprecate(tblisBackend(), TBLIS()) +Base.@deprecate(tblis_get_num_threads(), get_num_tblis_threads()) +Base.@deprecate(tblis_set_num_threads(n), set_num_tblis_threads(n)) - return C -end +include("strided.jl") end # module TensorOperationsTBLIS diff --git a/src/strided.jl b/src/strided.jl new file mode 100644 index 0000000..b700343 --- /dev/null +++ b/src/strided.jl @@ -0,0 +1,172 @@ +#------------------------------------------------------------------------------------------- +# Force strided implementation on AbstractArray instances with HPTTBLAS backend +#------------------------------------------------------------------------------------------- +const SV = StridedView +function TensorOperations.tensoradd!(C::AbstractArray, + A::AbstractArray, pA::Index2Tuple, conjA::Bool, + α::Number, β::Number, + backend::TBLIS, allocator=DefaultAllocator()) + # resolve conj flags and absorb into StridedView constructor to avoid type instabilities later on + if conjA + stridedtensoradd!(SV(C), conj(SV(A)), pA, α, β, backend, allocator) + else + stridedtensoradd!(SV(C), SV(A), pA, α, β, backend, allocator) + end + return C +end + +function TensorOperations.tensortrace!(C::AbstractArray, + A::AbstractArray, p::Index2Tuple, q::Index2Tuple, + conjA::Bool, + α::Number, β::Number, + backend::TBLIS, allocator=DefaultAllocator()) + # resolve conj flags and absorb into StridedView constructor to avoid type instabilities later on + if conjA + stridedtensortrace!(SV(C), conj(SV(A)), p, q, α, β, backend, allocator) + else + stridedtensortrace!(SV(C), SV(A), p, q, α, β, backend, allocator) + end + return C +end + +function TensorOperations.tensorcontract!(C::AbstractArray, + A::AbstractArray, pA::Index2Tuple, conjA::Bool, + B::AbstractArray, pB::Index2Tuple, conjB::Bool, + pAB::Index2Tuple, + α::Number, β::Number, + backend::TBLIS, allocator=DefaultAllocator()) + # resolve conj flags and absorb into StridedView constructor to avoid type instabilities later on + if conjA && conjB + stridedtensorcontract!(SV(C), conj(SV(A)), pA, conj(SV(B)), pB, pAB, α, β, + backend, allocator) + elseif conjA + stridedtensorcontract!(SV(C), conj(SV(A)), pA, SV(B), pB, pAB, α, β, + backend, allocator) + elseif conjB + stridedtensorcontract!(SV(C), SV(A), pA, conj(SV(B)), pB, pAB, α, β, + backend, allocator) + else + stridedtensorcontract!(SV(C), SV(A), pA, SV(B), pB, pAB, α, β, + backend, allocator) + end + return C +end + +#------------------------------------------------------------------------------------------- +# StridedView implementation +#------------------------------------------------------------------------------------------- +function tblis_tensor(A::StridedView, + s::Number=one(eltype(A)), + szA::Vector{len_type}=collect(len_type, size(A)), + strA::Vector{stride_type}=collect(stride_type, strides(A))) + t₁ = LibTBLIS.tblis_tensor(A, s, szA, strA) + if A.op == conj + t₂ = LibTBLIS.tblis_tensor(t₁.type, Cint(1), t₁.scalar, t₁.data, t₁.ndim, t₁.len, + t₁.stride) + return t₂ + else + return t₁ + end +end + +function stridedtensoradd!(C::StridedView{T}, + A::StridedView{T}, pA::Index2Tuple, + α::Number, β::Number, + backend::TBLIS, + allocator=DefaultAllocator()) where {T<:BlasFloat} + argcheck_tensoradd(C, A, pA) + dimcheck_tensoradd(C, A, pA) + if Base.mightalias(C, A) + throw(ArgumentError("output tensor must not be aliased with input tensor")) + end + + # directly use TBLIS types to avoid additional conversion step + C_tblis = tblis_tensor(C, β) + A_tblis = tblis_tensor(A, α) + einA, einC = TensorOperations.add_labels(pA) + tblis_tensor_add(A_tblis, string(einA...), C_tblis, string(einC...)) + return C +end + +function stridedtensortrace!(C::StridedView{T}, + A::StridedView{T}, p::Index2Tuple, q::Index2Tuple, + α::Number, β::Number, + backend::TBLIS, + allocator=DefaultAllocator()) where {T<:BlasFloat} + argcheck_tensortrace(C, A, p, q) + dimcheck_tensortrace(C, A, p, q) + + Base.mightalias(C, A) && + throw(ArgumentError("output tensor must not be aliased with input tensor")) + + # directly use TBLIS types to avoid additional conversion step + # isone(β) || rmul!(C, β) # TODO: check if TBLIS handles the scaling correctly + C_tblis = tblis_tensor(C, β) + A_tblis = tblis_tensor(A, α) + einA, einC = TensorOperations.trace_labels(p, q) + tblis_tensor_add(A_tblis, string(einA...), C_tblis, string(einC...)) + return C +end + +function stridedtensorcontract!(C::StridedView{T}, + A::StridedView{T}, pA::Index2Tuple, + B::StridedView{T}, pB::Index2Tuple, + pAB::Index2Tuple, + α::Number, β::Number, + backend::TBLIS, + allocator=DefaultAllocator()) where {T<:BlasFloat} + argcheck_tensorcontract(C, A, pA, B, pB, pAB) + dimcheck_tensorcontract(C, A, pA, B, pB, pAB) + + (Base.mightalias(C, A) || Base.mightalias(C, B)) && + throw(ArgumentError("output tensor must not be aliased with input tensor")) + + einA, einB, einC = TensorOperations.contract_labels(pA, pB, pAB) + + # tblis_tensor_mult ignores conjugation flags in A and B (and C) + if A.op == conj && B.op == conj + iszero(β) || conj!(C) + + C_tblis = tblis_tensor(C, conj(β)) + A_tblis = tblis_tensor(A, conj(α)) + B_tblis = tblis_tensor(B) + + tblis_tensor_mult(A_tblis, string(einA...), B_tblis, string(einB...), + C_tblis, string(einC...)) + conj!(C) + elseif A.op == conj + pA2 = TensorOperations.trivialpermutation(pA) + A2 = StridedView(TensorOperations.tensoralloc_add(eltype(A), A, pA2, false, + Val(true), + allocator)) + A2 = tensorcopy!(A2, A, pA2, false, α, backend, allocator) + + C_tblis = tblis_tensor(C, β) + A_tblis = tblis_tensor(A2) + B_tblis = tblis_tensor(B) + + tblis_tensor_mult(A_tblis, string(einA...), B_tblis, string(einB...), + C_tblis, string(einC...)) + elseif B.op == conj + pB2 = TensorOperations.trivialpermutation(pB) + B2 = StridedView(TensorOperations.tensoralloc_add(eltype(B), B, pB2, false, + Val(true), + allocator)) + B2 = tensorcopy!(B2, B, pB2, false, α, backend, allocator) + + C_tblis = tblis_tensor(C, β) + A_tblis = tblis_tensor(A) + B_tblis = tblis_tensor(B2) + + tblis_tensor_mult(A_tblis, string(einA...), B_tblis, string(einB...), + C_tblis, string(einC...)) + else + C_tblis = tblis_tensor(C, β) + A_tblis = tblis_tensor(A, α) + B_tblis = tblis_tensor(B) + + tblis_tensor_mult(A_tblis, string(einA...), B_tblis, string(einB...), + C_tblis, string(einC...)) + end + return C +end diff --git a/test/methods.jl b/test/methods.jl new file mode 100644 index 0000000..ede5b58 --- /dev/null +++ b/test/methods.jl @@ -0,0 +1,244 @@ +b = TBLIS() + +# test simple methods +#--------------------- +typelist = (Float32, Float64, ComplexF32, ComplexF64) +@testset "simple methods with eltype = $T" for T in typelist + @testset "tensorcopy" begin + A = randn(T, (3, 5, 4, 6)) + p = (3, 1, 4, 2) + C1 = permutedims(A, p) + C2 = @inferred tensorcopy((p...,), A, (1:4...,); backend=b) + C3 = @inferred tensorcopy(A, (p, ()), false, 1, b) + @test C1 ≈ C2 + @test C2 == C3 + @test C1 ≈ ncon(Any[A], Any[[-2, -4, -1, -3]]) + @test_throws IndexError tensorcopy(1:4, A, 1:3) + @test_throws IndexError tensorcopy(1:4, A, [1, 2, 2, 4]) + end + + @testset "tensoradd" begin + A = randn(T, (3, 5, 4, 6)) + B = randn(T, (5, 6, 3, 4)) + p = (3, 1, 4, 2) + C1 = A + permutedims(B, p) + C2 = @inferred tensoradd(A, p, B, (1:4...,); backend=b) + C3 = @inferred tensoradd(A, ((1:4...,), ()), false, B, (p, ()), false, 1, 1, b) + @test C1 ≈ C2 + @test C2 == C3 + @test C1 ≈ A + ncon(Any[B], Any[[-2, -4, -1, -3]]; backend=b) + @test_throws DimensionMismatch tensoradd(A, 1:4, B, 1:4) + end + + @testset "tensortrace" begin + A = randn(Float64, (50, 100, 100)) + C1 = zeros(50) + for i in 1:50 + for j in 1:100 + C1[i] += A[i, j, j] + end + end + C2 = tensortrace(A, [:a, :b, :b]; backend=b) + C3 = ncon(Any[A], Any[[-1, 1, 1]]; backend=b) + @test C1 ≈ C2 + @test C2 == C3 + A = randn(Float64, (3, 20, 5, 3, 20, 4, 5)) + C1 = zeros(4, 3, 3) + for i1 in 1:4, i2 in 1:3, i3 in 1:3 + for j1 in 1:20, j2 in 1:5 + C1[i1, i2, i3] += A[i2, j1, j2, i3, j1, i1, j2] + end + end + C2 = @inferred tensortrace((:e, :a, :d), A, (:a, :b, :c, :d, :b, :e, :c); backend=b) + C3 = @inferred tensortrace(A, ((6, 1, 4), ()), ((2, 3), (5, 7)), false, 1.0, b) + C4 = ncon(Any[A], Any[[-2, 1, 2, -3, 1, -1, 2]]; backend=b) + @test C1 ≈ C2 + @test C2 == C3 == C4 + @test_throws IndexError tensortrace(randn(2, 2, 2, 2, 2, 2, 2), ((1,), (3, 2)), + ((1, 5), (2, 6)), false) + end + + @testset "tensorcontract" begin + A = randn(T, (3, 20, 5, 3, 4)) + B = randn(T, (5, 6, 20, 3)) + C1 = zeros(T, (3, 3, 4, 3, 6)) + for a in 1:3, b in 1:20, c in 1:5, d in 1:3, e in 1:4, f in 1:6, g in 1:3 + C1[a, g, e, d, f] += A[a, b, c, d, e] * B[c, f, b, g] + end + C2 = @inferred tensorcontract((:a, :g, :e, :d, :f), + A, (:a, :b, :c, :d, :e), B, (:c, :f, :b, :g); + backend=b) + C3 = @inferred tensorcontract(A, ((1, 4, 5), (2, 3)), false, B, ((3, 1), (2, 4)), + false, ((1, 5, 3, 2, 4), ()), 1, b) + C4 = @inferred tensorcontract(A, ((1, 4, 5), (2, 3)), false, B, ((3, 1), (2, 4)), + false, ((1, 5, 3, 2, 4), ()), 1, b, + ManualAllocator()) + C5 = ncon(Any[A, B], Any[[-1, 1, 2, -4, -3], [2, -5, 1, -2]]; backend=b, + allocator=ManualAllocator()) + + @test C1 ≈ C2 + @test C2 == C3 == C4 == C5 + @test_throws IndexError tensorcontract(A, [:a, :b, :c, :d], B, [:c, :f, :b, :g]) + @test_throws IndexError tensorcontract(A, [:a, :b, :c, :a, :e], B, [:c, :f, :b, :g]) + end + + @testset "tensorproduct" begin + A = randn(T, (5, 5, 5, 5)) + B = rand(T, (5, 5, 5, 5)) + C1 = kron(reshape(B, (25, 25)), reshape(A, (25, 25))) + C2 = reshape((@inferred tensorproduct((1, 2, 5, 6, 3, 4, 7, 8), + A, (1, 2, 3, 4), B, (5, 6, 7, 8); backend=b)), + (5 * 5 * 5 * 5, 5 * 5 * 5 * 5)) + @test C1 ≈ C2 + @test_throws IndexError tensorproduct(A, [:a, :b, :c, :d], + B, [:d, :e, :f, :g]) + @test_throws IndexError tensorproduct([:a, :b, :c, :d, :e, :f, :g, :i], + A, [:a, :b, :c, :d], B, [:e, :f, :g, :h]) + + A = rand(1, 2) + B = rand(4, 5) + C1 = zeros(T, (2, 4, 1, 5)) + for i in axes(C1, 1), j in axes(C1, 2), k in axes(C1, 3), l in axes(C1, 4) + C1[i, j, k, l] = A[k, i] * B[j, l] + end + C2 = tensorcontract((-1, -2, -3, -4), A, (-3, -1), false, B, (-2, -4), false; + backend=b) + C3 = tensorproduct(A, ((1, 2), ()), false, B, ((), (1, 2)), false, + ((2, 3, 1, 4), ()), 1, b) + C4 = tensorproduct(A, ((1, 2), ()), false, B, ((), (1, 2)), false, + ((2, 3, 1, 4), ()), 1, b, ManualAllocator()) + @test C1 ≈ C2 + @test C2 == C3 == C4 + end +end + +# test in-place methods +#----------------------- +# test different versions of in-place methods, +# with changing element type and with nontrivial strides +@testset "in-place methods" begin + @testset "tensorcopy!" begin + Abig = randn(Float64, (30, 30, 30, 30)) + A = view(Abig, 1 .+ 3 * (0:9), 2 .+ 2 * (0:6), 5 .+ 3 * (0:6), 4 .+ 3 * (0:8)) + p = (3, 1, 4, 2) + Cbig = zeros(Float64, (50, 50, 50, 50)) + C = view(Cbig, 13 .+ (0:6), 11 .+ 2 * (0:9), 7 .+ 5 * (0:8), 4 .+ 5 * (0:6)) + Acopy = tensorcopy(A, 1:4) + Ccopy = tensorcopy(C, 1:4) + pA = (p, ()) + α = randn(Float64) + tensorcopy!(C, A, pA, false, α, b) + tensorcopy!(Ccopy, Acopy, pA, false, 1.0, b) + @test C ≈ α * Ccopy + @test_throws IndexError tensorcopy!(C, A, ((1, 2, 3), ()), false, 1.0, b) + @test_throws DimensionMismatch tensorcopy!(C, A, ((1, 2, 3, 4), ()), false, 1.0, b) + @test_throws IndexError tensorcopy!(C, A, ((1, 2, 2, 3), ()), false, 1.0, b) + end + + @testset "tensoradd!" begin + Abig = randn(ComplexF32, (30, 30, 30, 30)) + A = view(Abig, 1 .+ 3 * (0:9), 2 .+ 2 * (0:6), 5 .+ 4 * (0:6), 4 .+ 3 * (0:8)) + p = (3, 1, 4, 2) + Cbig = zeros(ComplexF32, (50, 50, 50, 50)) + C = view(Cbig, 13 .+ (0:6), 11 .+ 4 * (0:9), 15 .+ 4 * (0:8), 4 .+ 3 * (0:6)) + Acopy = tensorcopy(p, A, 1:4) + Ccopy = tensorcopy(1:4, C, 1:4) + α = randn(ComplexF32) + β = randn(ComplexF32) + tensoradd!(C, A, (p, ()), false, α, β, b) + Ccopy = β * Ccopy + α * Acopy + @test C ≈ Ccopy + @test_throws IndexError tensoradd!(C, A, ((1, 2, 3), ()), false, 1.2, 0.5, b) + @test_throws DimensionMismatch tensoradd!(C, A, ((1, 2, 3, 4), ()), false, 1.2, 0.5, + b) + @test_throws IndexError tensoradd!(C, A, ((1, 1, 2, 3), ()), false, 1.2, 0.5, b) + end + + @testset "tensortrace!" begin + Abig = rand(ComplexF64, (30, 30, 30, 30)) + A = view(Abig, 1 .+ 3 * (0:8), 2 .+ 2 * (0:14), 5 .+ 4 * (0:6), 7 .+ 2 * (0:8)) + Bbig = rand(ComplexF64, (50, 50)) + B = view(Bbig, 13 .+ (0:14), 3 .+ 5 * (0:6)) + Acopy = tensorcopy(A, 1:4) + Bcopy = tensorcopy(B, 1:2) + α = randn(Float64) + β = randn(Float64) + tensortrace!(B, A, ((2, 3), ()), ((1,), (4,)), true, α, β, b) + Bcopy = β * Bcopy + for i in 1 .+ (0:8) + Bcopy += α * conj(view(A, i, :, :, i)) + end + @test B ≈ Bcopy + @test_throws IndexError tensortrace!(B, A, ((1,), ()), ((2,), (3,)), false, α, β, b) + @test_throws DimensionMismatch tensortrace!(B, A, ((1, 4), ()), ((2,), (3,)), false, + α, β, b) + @test_throws IndexError tensortrace!(B, A, ((1, 4), ()), ((1, 1), (4,)), false, α, + β, b) + @test_throws IndexError tensortrace!(B, A, ((1, 4), ()), ((1,), (3,)), false, + α, β, b) + end + + @testset "tensorcontract! with allocator = $allocator" for allocator in + (DefaultAllocator(), + ManualAllocator()) + Abig = rand(ComplexF64, (30, 30, 30, 30)) + A = view(Abig, 1 .+ 3 * (0:8), 2 .+ 2 * (0:14), 5 .+ 4 * (0:6), 7 .+ 2 * (0:8)) + Bbig = rand(ComplexF64, (50, 50, 50)) + B = view(Bbig, 3 .+ 5 * (0:6), 7 .+ 2 * (0:7), 13 .+ (0:14)) + Cbig = rand(ComplexF64, (40, 40, 40)) + C = view(Cbig, 3 .+ 2 * (0:8), 13 .+ (0:8), 7 .+ 3 * (0:7)) + Acopy = tensorcopy(A, 1:4) + Bcopy = tensorcopy(B, 1:3) + Ccopy = tensorcopy(C, 1:3) + α = randn(ComplexF64) + β = randn(ComplexF64) + Ccopy = β * Ccopy + for d in 1 .+ (0:8), a in 1 .+ (0:8), e in 1 .+ (0:7) + for b in 1 .+ (0:14), c in 1 .+ (0:6) + Ccopy[d, a, e] += α * A[a, b, c, d] * conj(B[c, e, b]) + end + end + tensorcontract!(C, A, ((4, 1), (2, 3)), false, B, ((3, 1), (2,)), true, + ((1, 2, 3), ()), α, β, b, allocator) + @test C ≈ Ccopy + + Ccopy = tensorcopy(C, 1:3) + Ccopy = β * Ccopy + for d in 1 .+ (0:8), a in 1 .+ (0:8), e in 1 .+ (0:7) + for b in 1 .+ (0:14), c in 1 .+ (0:6) + Ccopy[d, a, e] += α * conj(A[a, b, c, d]) * conj(B[c, e, b]) + end + end + tensorcontract!(C, A, ((4, 1), (2, 3)), true, B, ((3, 1), (2,)), true, + ((1, 2, 3), ()), α, β, b, allocator) + @test C ≈ Ccopy + + Ccopy = tensorcopy(C, 1:3) + Ccopy = β * Ccopy + for d in 1 .+ (0:8), a in 1 .+ (0:8), e in 1 .+ (0:7) + for b in 1 .+ (0:14), c in 1 .+ (0:6) + Ccopy[d, a, e] += α * conj(A[a, b, c, d]) * B[c, e, b] + end + end + tensorcontract!(C, A, ((4, 1), (2, 3)), true, B, ((3, 1), (2,)), false, + ((1, 2, 3), ()), α, β, b, allocator) + @test C ≈ Ccopy + + @test_throws IndexError tensorcontract!(C, + A, ((4, 1), (2, 4)), false, + B, ((1, 3), (2,)), false, + ((1, 2, 3), ()), α, β, b) + @test_throws IndexError tensorcontract!(C, + A, ((4, 1), (2, 3)), false, + B, ((1, 3), ()), false, + ((1, 2, 3), ()), α, β, b) + @test_throws IndexError tensorcontract!(C, + A, ((4, 1), (2, 3)), false, + B, ((1, 3), (2,)), false, + ((1, 2), ()), α, β, b) + @test_throws DimensionMismatch tensorcontract!(C, + A, ((4, 1), (2, 3)), false, + B, ((1, 3), (2,)), false, + ((1, 3, 2), ()), α, β, b) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 4efbc95..8e76130 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,136 +1,20 @@ -using TensorOperations -using TensorOperationsTBLIS -using Test -using LinearAlgebra: norm - -const tblisbackend = tblisBackend() -@testset "elementary operations" verbose = true begin - @testset "tensorcopy" begin - A = randn(Float32, (3, 5, 4, 6)) - @tensor C1[4, 1, 3, 2] := A[1, 2, 3, 4] - @tensor backend = tblisbackend C2[4, 1, 3, 2] := A[1, 2, 3, 4] - @test C2 ≈ C1 - end - - @testset "tensoradd" begin - A = randn(Float32, (5, 6, 3, 4)) - B = randn(Float32, (5, 6, 3, 4)) - α = randn(Float32) - @tensor C1[a, b, c, d] := A[a, b, c, d] + α * B[a, b, c, d] - @tensor backend = tblisbackend C2[a, b, c, d] := A[a, b, c, d] + α * B[a, b, c, d] - @test collect(C2) ≈ C1 - - C = randn(ComplexF32, (5, 6, 3, 4)) - D = randn(ComplexF32, (5, 3, 4, 6)) - β = randn(ComplexF32) - @tensor E1[a, b, c, d] := C[a, b, c, d] + β * conj(D[a, c, d, b]) - @tensor backend = tblisbackend E2[a, b, c, d] := C[a, b, c, d] + - β * conj(D[a, c, d, b]) - @test collect(E2) ≈ E1 - end - - @testset "tensortrace" begin - A = randn(Float32, (5, 10, 10)) - @tensor B1[a] := A[a, b′, b′] - @tensor backend = tblisbackend B2[a] := A[a, b′, b′] - @test B2 ≈ B1 - - C = randn(ComplexF32, (3, 20, 5, 3, 20, 4, 5)) - @tensor D1[e, a, d] := C[a, b, c, d, b, e, c] - @tensor backend = tblisbackend D2[e, a, d] := C[a, b, c, d, b, e, c] - @test D2 ≈ D1 - - @tensor D3[a, e, d] := conj(C[a, b, c, d, b, e, c]) - @tensor backend = tblisbackend D4[a, e, d] := conj(C[a, b, c, d, b, e, c]) - @test D4 ≈ D3 - - α = randn(ComplexF32) - @tensor D5[d, e, a] := α * C[a, b, c, d, b, e, c] - @tensor backend = tblisbackend D6[d, e, a] := α * C[a, b, c, d, b, e, c] - @test D6 ≈ D5 - end - - @testset "tensorcontract" begin - A = randn(Float32, (3, 20, 5, 3, 4)) - B = randn(Float32, (5, 6, 20, 3)) - @tensor C1[a, g, e, d, f] := A[a, b, c, d, e] * B[c, f, b, g] - @tensor backend = tblisbackend C2[a, g, e, d, f] := A[a, b, c, d, e] * B[c, f, b, g] - @test C2 ≈ C1 - - D = randn(ComplexF64, (3, 3, 3)) - E = rand(ComplexF64, (3, 3, 3)) - @tensor F1[a, b, c, d, e, f] := D[a, b, c] * conj(E[d, e, f]) - @tensor backend = tblisbackend F2[a, b, c, d, e, f] := D[a, b, c] * conj(E[d, e, f]) - @test F2 ≈ F1 atol = 1e-12 +using TensorOperations, TensorOperationsTBLIS +using TensorOperations: IndexError +using TensorOperations: DefaultAllocator, ManualAllocator +using Test, Random, LinearAlgebra +Random.seed!(1234567) + +@testset "TensorOperationsTBLIS.jl" begin + set_num_tblis_threads(1) + @testset "method syntax" verbose = true begin + include("methods.jl") end -end - -@testset "more complicated expressions" verbose = true begin - Da, Db, Dc, Dd, De, Df, Dg, Dh = 10, 15, 4, 8, 6, 7, 3, 2 - A = rand(ComplexF64, (Dc, Da, Df, Da, De, Db, Db, Dg)) - B = rand(ComplexF64, (Dc, Dh, Dg, De, Dd)) - C = rand(ComplexF64, (Dd, Dh, Df)) - α = rand(ComplexF64) - # α = 1 - - @tensor D1[d, f, h] := A[c, a, f, a, e, b, b, g] * B[c, h, g, e, d] + α * C[d, h, f] - @tensor backend = tblisbackend D2[d, f, h] := A[c, a, f, a, e, b, b, g] * - B[c, h, g, e, d] + - α * C[d, h, f] - @test D2 ≈ D1 rtol = 1e-8 - @test norm(vec(D1)) ≈ sqrt(abs(@tensor D1[d, f, h] * conj(D1[d, f, h]))) - @test norm(D2) ≈ - sqrt(abs(@tensor backend = tblisbackend D2[d, f, h] * conj(D2[d, f, h]))) - - @testset "readme example" begin - α = randn() - A = randn(5, 5, 5, 5, 5, 5) - B = randn(5, 5, 5) - C = randn(5, 5, 5) - D = zeros(5, 5, 5) - D2 = copy(D) - @tensor begin - D[a, b, c] = A[a, e, f, c, f, g] * B[g, b, e] + α * C[c, a, b] - E[a, b, c] := A[a, e, f, c, f, g] * B[g, b, e] + α * C[c, a, b] - end - @tensor backend = tblisbackend begin - D2[a, b, c] = A[a, e, f, c, f, g] * B[g, b, e] + α * C[c, a, b] - E2[a, b, c] := A[a, e, f, c, f, g] * B[g, b, e] + α * C[c, a, b] - end - @test D2 ≈ D - @test E2 ≈ E + @test get_num_tblis_threads() == 1 + set_num_tblis_threads(2) + @testset "macro with index notation" verbose = true begin + include("tensor.jl") end - @testset "tensor network examples ($T)" for T in - (Float32, Float64, ComplexF32, ComplexF64) - D1, D2, D3 = 30, 40, 20 - d1, d2 = 2, 3 - - A1 = randn(T, D1, d1, D2) - A2 = randn(T, D2, d2, D3) - ρₗ = randn(T, D1, D1) - ρᵣ = randn(T, D3, D3) - H = randn(T, d1, d2, d1, d2) - - @tensor begin - HrA12[a, s1, s2, c] := ρₗ[a, a'] * A1[a', t1, b] * A2[b, t2, c'] * ρᵣ[c', c] * - H[s1, s2, t1, t2] - end - @tensor backend = tblisbackend begin - HrA12′[a, s1, s2, c] := ρₗ[a, a'] * A1[a', t1, b] * A2[b, t2, c'] * ρᵣ[c', c] * - H[s1, s2, t1, t2] - end - @test HrA12′ ≈ HrA12 - - @tensor begin - E1 = ρₗ[a', a] * A1[a, s, b] * A2[b, s', c] * ρᵣ[c, c'] * H[t, t', s, s'] * - conj(A1[a', t, b']) * conj(A2[b', t', c']) - end - @tensor backend = tblisbackend begin - E2 = ρₗ[a', a] * A1[a, s, b] * A2[b, s', c] * ρᵣ[c, c'] * H[t, t', s, s'] * - conj(A1[a', t, b']) * conj(A2[b', t', c']) - end - @test E2 ≈ E1 - end + @test get_num_tblis_threads() == 2 end diff --git a/test/tensor.jl b/test/tensor.jl new file mode 100644 index 0000000..a9255f1 --- /dev/null +++ b/test/tensor.jl @@ -0,0 +1,285 @@ +# test index notation using @tensor macro +#----------------------------------------- +b = TBLIS() + +@testset "tensorcontract 1" begin + A = randn(Float64, (3, 5, 4, 6)) + p = (4, 1, 3, 2) + C1 = permutedims(A, p) + @tensor backend = b C2[4, 1, 3, 2] := A[1, 2, 3, 4] + @test C1 ≈ C2 + @test_throws IndexError begin + @tensor C[1, 2, 3, 4] := A[1, 2, 3] + end + @test_throws IndexError begin + @tensor C[1, 2, 3, 4] := A[1, 2, 2, 4] + end + + B = randn(Float64, (5, 6, 3, 4)) + p = [3, 1, 4, 2] + @tensor backend = b C1[3, 1, 4, 2] := A[3, 1, 4, 2] + B[1, 2, 3, 4] + C2 = A + permutedims(B, p) + @test C1 ≈ C2 + @test_throws DimensionMismatch begin + @tensor backend = b C[1, 2, 3, 4] := A[1, 2, 3, 4] + B[1, 2, 3, 4] + end + + A = randn(Float64, (50, 100, 100)) + @tensor backend = b C1[a] := A[a, b', b'] + C2 = zeros(50) + for i in 1:50 + for j in 1:100 + C2[i] += A[i, j, j] + end + end + @test C1 ≈ C2 + A = randn(Float64, (3, 20, 5, 3, 20, 4, 5)) + @tensor backend = b C1[e, a, d] := A[a, b, c, d, b, e, c] + C2 = zeros(4, 3, 3) + for i1 in 1:4, i2 in 1:3, i3 in 1:3 + for j1 in 1:20, j2 in 1:5 + C2[i1, i2, i3] += A[i2, j1, j2, i3, j1, i1, j2] + end + end + @test C1 ≈ C2 + + A = randn(Float64, (3, 20, 5, 3, 4)) + B = randn(Float64, (5, 6, 20, 3)) + @tensor backend = b C1[a, g, e, d, f] := A[a, b, c, d, e] * B[c, f, b, g] + C2 = zeros(3, 3, 4, 3, 6) + for a in 1:3, b in 1:20, c in 1:5, d in 1:3, e in 1:4, f in 1:6, g in 1:3 + C2[a, g, e, d, f] += A[a, b, c, d, e] * B[c, f, b, g] + end + @test C1 ≈ C2 + @test_throws IndexError begin + @tensor backend = b A[a, b, c, d] * B[c, f, b, g] + end +end + +@testset "tensorcontract 2" begin + A = randn(ComplexF32, (5, 5, 5, 5)) + B = rand(ComplexF32, (5, 5, 5, 5)) + @tensor backend = b C1[1, 2, 5, 6, 3, 4, 7, 8] := A[1, 2, 3, 4] * B[5, 6, 7, 8] + C2 = reshape(kron(reshape(B, (25, 25)), reshape(A, (25, 25))), + (5, 5, 5, 5, 5, 5, 5, 5)) + @test C1 ≈ C2 + @test_throws IndexError begin + @tensor backend = b C[a, b, c, d, e, f, g, i] := A[a, b, c, d] * B[e, f, g, h] + end +end + +@testset "tensorcontract 3" begin + Da, Db, Dc, Dd, De, Df, Dg, Dh = 10, 15, 4, 8, 6, 7, 3, 2 + A = rand(ComplexF64, (Da, Dc, Df, Da, De, Db, Db, Dg)) + B = rand(ComplexF64, (Dc, Dh, Dg, De, Dd)) + C = rand(ComplexF64, (Dd, Dh, Df)) + @tensor backend = b D1[d, f, h] := A[a, c, f, a, e, b, b, g] * B[c, h, g, e, d] + + 0.5 * C[d, h, f] + D2 = zeros(ComplexF64, (Dd, Df, Dh)) + for d in 1:Dd, f in 1:Df, h in 1:Dh + D2[d, f, h] += 0.5 * C[d, h, f] + for a in 1:Da, b in 1:Db, c in 1:Dc, e in 1:De, g in 1:Dg + D2[d, f, h] += A[a, c, f, a, e, b, b, g] * B[c, h, g, e, d] + end + end + @test D1 ≈ D2 + @test norm(vec(D1)) ≈ + sqrt(abs((@tensor backend = b tensorscalar(D1[d, f, h] * conj(D1[d, f, h]))))) +end + +@testset "views" begin + p = [3, 1, 4, 2] + Abig = randn(Float32, (30, 30, 30, 30)) + A = view(Abig, 1 .+ 3 .* (0:9), 2 .+ 2 .* (0:6), 5 .+ 4 .* (0:6), 4 .+ 3 .* (0:8)) + Cbig = zeros(Float32, (50, 50, 50, 50)) + C = view(Cbig, 13 .+ (0:6), 11 .+ 4 .* (0:9), 15 .+ 4 .* (0:8), 4 .+ 3 .* (0:6)) + Acopy = copy(A) + Ccopy = copy(C) + @tensor backend = b C[3, 1, 4, 2] = A[1, 2, 3, 4] + @tensor backend = b Ccopy[3, 1, 4, 2] = Acopy[1, 2, 3, 4] + @test C ≈ Ccopy + @test_throws IndexError begin + @tensor C[3, 1, 4, 2] = A[1, 2, 3] + end + @test_throws DimensionMismatch begin + @tensor C[3, 1, 4, 2] = A[3, 1, 4, 2] + end + @test_throws IndexError begin + @tensor C[1, 1, 2, 3] = A[1, 2, 3, 4] + end +end + +@testset "views 2" begin + p = [3, 1, 4, 2] + Abig = randn(ComplexF64, (30, 30, 30, 30)) + A = view(Abig, 1 .+ 3 .* (0:9), 2 .+ 2 .* (0:6), 5 .+ 4 .* (0:6), 4 .+ 3 .* (0:8)) + Cbig = zeros(ComplexF64, (50, 50, 50, 50)) + C = view(Cbig, 13 .+ (0:6), 11 .+ 4 .* (0:9), 15 .+ 4 .* (0:8), 4 .+ 3 .* (0:6)) + Acopy = permutedims(copy(A), p) + Ccopy = copy(C) + α = randn(Float64) + β = randn(Float64) + @tensor backend = b D[3, 1, 4, 2] := β * C[3, 1, 4, 2] + α * A[1, 2, 3, 4] + Dcopy = β * Ccopy + α * Acopy + @test D ≈ Dcopy + @test_throws IndexError begin + @tensor D[3, 1, 4, 2] = 0.5 * C[3, 1, 4, 2] + 1.2 * A[1, 2, 3] + end + @test_throws DimensionMismatch begin + @tensor D[3, 1, 4, 2] = 0.5 * C[3, 1, 4, 2] + 1.2 * A[3, 1, 2, 4] + end + @test_throws IndexError begin + @tensor D[1, 1, 2, 3] = 0.5 * C[1, 1, 2, 3] + 1.2 * A[1, 2, 3, 4] + end +end + +@testset "views 3" begin + Abig = rand(Float64, (30, 30, 30, 30)) + A = view(Abig, 1 .+ 3 .* (0:8), 2 .+ 2 .* (0:14), 5 .+ 4 .* (0:6), 7 .+ 2 .* (0:8)) + Bbig = rand(Float64, (50, 50)) + B = view(Bbig, 13 .+ (0:14), 3 .+ 5 .* (0:6)) + Acopy = copy(A) + Bcopy = copy(B) + α = randn(Float64) + @tensor backend = b B[b, c] += α * A[a, b, c, a] + for i in 1 .+ (0:8) + Bcopy += α * view(A, i, :, :, i) + end + @test B ≈ Bcopy + @test_throws IndexError begin + @tensor B[b, c] += α * A[a, b, c] + end + @test_throws DimensionMismatch begin + @tensor B[c, b] += α * A[a, b, c, a] + end + @test_throws IndexError begin + @tensor B[c, b] += α * A[a, b, a, a] + end + @test_throws DimensionMismatch begin + @tensor B[c, b] += α * A[a, b, a, c] + end +end + +@testset "views 4" begin + Abig = rand(ComplexF64, (30, 30, 30, 30)) + A = view(Abig, 1 .+ 3 .* (0:8), 2 .+ 2 .* (0:14), 5 .+ 4 .* (0:6), 7 .+ 2 .* (0:8)) + Bbig = rand(ComplexF64, (50, 50, 50)) + B = view(Bbig, 3 .+ 5 .* (0:6), 7 .+ 2 .* (0:7), 13 .+ (0:14)) + Cbig = rand(ComplexF64, (40, 40, 40)) + C = view(Cbig, 3 .+ 2 .* (0:8), 13 .+ (0:8), 7 .+ 3 .* (0:7)) + Acopy = copy(A) + Bcopy = copy(B) + Ccopy = copy(C) + α = randn(Float64) + for d in 1 .+ (0:8), a in 1 .+ (0:8), e in 1 .+ (0:7) + for b in 1 .+ (0:14), c in 1 .+ (0:6) + Ccopy[d, a, e] -= α * A[a, b, c, d] * conj(B[c, e, b]) + end + end + @tensor backend = b C[d, a, e] -= α * A[a, b, c, d] * conj(B[c, e, b]) + @test C ≈ Ccopy +end + +@testset "Float32 views" begin + α = randn(Float64) + Abig = rand(ComplexF32, (30, 30, 30, 30)) + A = view(Abig, 1 .+ 3 .* (0:8), 2 .+ 2 .* (0:14), 5 .+ 4 .* (0:6), 7 .+ 2 .* (0:8)) + Bbig = rand(ComplexF32, (50, 50, 50)) + B = view(Bbig, 3 .+ 5 .* (0:6), 7 .+ 2 .* (0:7), 13 .+ (0:14)) + Cbig = rand(ComplexF32, (40, 40, 40)) + C = view(Cbig, 3 .+ 2 .* (0:8), 13 .+ (0:8), 7 .+ 3 .* (0:7)) + Ccopy = copy(C) + for d in 1 .+ (0:8), a in 1 .+ (0:8), e in 1 .+ (0:7) + for b in 1 .+ (0:14), c in 1 .+ (0:6) + Ccopy[d, a, e] += α * A[a, b, c, d] * conj(B[c, e, b]) + end + end + @tensor backend = b C[d, a, e] += α * A[a, b, c, d] * conj(B[c, e, b]) + @test C ≈ Ccopy + @test_throws IndexError begin + @tensor C[d, a, e] += α * A[a, b, c, a] * B[c, e, b] + end + @test_throws IndexError begin + @tensor C[d, a, e] += α * A[a, b, c, d] * B[c, b] + end + @test_throws IndexError begin + @tensor C[d, e] += α * A[a, b, c, d] * B[c, e, b] + end + @test_throws DimensionMismatch begin + @tensor C[d, e, a] += α * A[a, b, c, d] * B[c, e, b] + end +end + +# Simple function example +# @tensor function f(A, b) +# w[x] := (1 // 2) * A[x, y] * b[y] +# return w +# end +# for T in (Float32, Float64, ComplexF32, ComplexF64, BigFloat) +# A = rand(T, 10, 10) +# b = rand(T, 10) +# @test f(A, b) ≈ (1 // 2) * A * b +# end + +# Example from README.md +@testset "README example" begin + α = randn() + A = randn(5, 5, 5, 5, 5, 5) + B = randn(5, 5, 5) + C = randn(5, 5, 5) + D = zeros(5, 5, 5) + @tensor begin + D[a, b, c] = A[a, e, f, c, f, g] * B[g, b, e] + α * C[c, a, b] + E[a, b, c] := A[a, e, f, c, f, g] * B[g, b, e] + α * C[c, a, b] + end + @test D == E +end + +# Some tensor network examples +scalartypelist = (Float32, Float64, ComplexF32, ComplexF64) +@testset "tensor network examples ($T)" for T in scalartypelist + D1, D2, D3 = 30, 40, 20 + d1, d2 = 2, 3 + A1 = rand(T, D1, d1, D2) .- 1 // 2 + A2 = rand(T, D2, d2, D3) .- 1 // 2 + rhoL = rand(T, D1, D1) .- 1 // 2 + rhoR = rand(T, D3, D3) .- 1 // 2 + H = rand(T, d1, d2, d1, d2) .- 1 // 2 + A12 = reshape(reshape(A1, D1 * d1, D2) * reshape(A2, D2, d2 * D3), (D1, d1, d2, D3)) + rA12 = reshape(reshape(rhoL * reshape(A12, (D1, d1 * d2 * D3)), + (D1 * d1 * d2, D3)) * rhoR, (D1, d1, d2, D3)) + HrA12 = permutedims(reshape(reshape(H, (d1 * d2, d1 * d2)) * + reshape(permutedims(rA12, (2, 3, 1, 4)), + (d1 * d2, D1 * D3)), (d1, d2, D1, D3)), + (3, 1, 2, 4)) + E = dot(A12, HrA12) + @tensor backend = b HrA12′[a, s1, s2, c] := rhoL[a, a'] * A1[a', t1, b] * + A2[b, t2, c'] * + rhoR[c', c] * H[s1, s2, t1, t2] + @tensor backend = b HrA12′′[:] := rhoL[-1, 1] * H[-2, -3, 4, 5] * A2[2, 5, 3] * + rhoR[3, -4] * + A1[1, 4, 2] # should be contracted in exactly same order + @tensor backend = b order = (a', b, c', t1, t2) begin + HrA12′′′[a, s1, s2, c] := rhoL[a, a'] * H[s1, s2, t1, t2] * A2[b, t2, c'] * + rhoR[c', c] * A1[a', t1, b] # should be contracted in exactly same order + end + @tensoropt HrA12′′′′[:] := rhoL[-1, 1] * H[-2, -3, 4, 5] * A2[2, 5, 3] * + rhoR[3, -4] * A1[1, 4, 2] + + @test HrA12′ == HrA12′′ == HrA12′′′ # should be exactly equal + @test HrA12 ≈ HrA12′ + @test HrA12 ≈ HrA12′′′′ + @test HrA12′′ ≈ ncon([rhoL, H, A2, rhoR, A1], + [[-1, 1], [-2, -3, 4, 5], [2, 5, 3], [3, -4], [1, 4, 2]]) + @test HrA12′′ == @ncon([rhoL, H, A2, rhoR, A1], + [[-1, 1], [-2, -3, 4, 5], [2, 5, 3], [3, -4], [1, 4, 2]]; + order=[1, 2, 3, 4, 5], output=[-1, -2, -3, -4], backend=b) + @test E ≈ + @tensor tensorscalar(rhoL[a', a] * A1[a, s, b] * A2[b, s', c] * rhoR[c, c'] * + H[t, t', s, s'] * conj(A1[a', t, b']) * + conj(A2[b', t', c'])) + @test E ≈ @ncon([rhoL, A1, A2, rhoR, H, conj(A1), conj(A2)], + [[5, 1], [1, 2, 3], [3, 4, 9], [9, 10], [6, 8, 2, 4], [5, 6, 7], + [7, 8, 10]]) + # this implicitly tests that `ncon` returns a scalar if no open indices +end From 118e5d33f52c8c7560cb3c6558b14933717dfc85 Mon Sep 17 00:00:00 2001 From: Jutho Date: Sat, 2 Nov 2024 22:52:23 +0100 Subject: [PATCH 2/7] improve tests and fix other comments --- src/TensorOperationsTBLIS.jl | 1 + src/strided.jl | 41 ++++++----- test/methods.jl | 70 ++++-------------- test/tensor.jl | 134 ++++++----------------------------- 4 files changed, 58 insertions(+), 188 deletions(-) diff --git a/src/TensorOperationsTBLIS.jl b/src/TensorOperationsTBLIS.jl index 2ddf149..a07063b 100644 --- a/src/TensorOperationsTBLIS.jl +++ b/src/TensorOperationsTBLIS.jl @@ -6,6 +6,7 @@ using TensorOperations: istrivialpermutation, BlasFloat, linearize using TensorOperations: argcheck_tensoradd, dimcheck_tensoradd, argcheck_tensortrace, dimcheck_tensortrace, argcheck_tensorcontract, dimcheck_tensorcontract +using TensorOperations: stridedtensoradd!, stridedtensortrace!, stridedtensorcontract! using TensorOperations: Index2Tuple, IndexTuple, linearize, IndexError using LinearAlgebra: BlasFloat using TupleTools diff --git a/src/strided.jl b/src/strided.jl index b700343..66f79b8 100644 --- a/src/strided.jl +++ b/src/strided.jl @@ -1,5 +1,5 @@ #------------------------------------------------------------------------------------------- -# Force strided implementation on AbstractArray instances with HPTTBLAS backend +# Force strided implementation on AbstractArray instances with TBLIS backend #------------------------------------------------------------------------------------------- const SV = StridedView function TensorOperations.tensoradd!(C::AbstractArray, @@ -69,18 +69,17 @@ function tblis_tensor(A::StridedView, end end -function stridedtensoradd!(C::StridedView{T}, - A::StridedView{T}, pA::Index2Tuple, - α::Number, β::Number, - backend::TBLIS, - allocator=DefaultAllocator()) where {T<:BlasFloat} +function TensorOperations.stridedtensoradd!(C::StridedView{T}, + A::StridedView{T}, pA::Index2Tuple, + α::Number, β::Number, + backend::TBLIS, + allocator=DefaultAllocator()) where {T<:BlasFloat} argcheck_tensoradd(C, A, pA) dimcheck_tensoradd(C, A, pA) if Base.mightalias(C, A) throw(ArgumentError("output tensor must not be aliased with input tensor")) end - # directly use TBLIS types to avoid additional conversion step C_tblis = tblis_tensor(C, β) A_tblis = tblis_tensor(A, α) einA, einC = TensorOperations.add_labels(pA) @@ -88,19 +87,19 @@ function stridedtensoradd!(C::StridedView{T}, return C end -function stridedtensortrace!(C::StridedView{T}, - A::StridedView{T}, p::Index2Tuple, q::Index2Tuple, - α::Number, β::Number, - backend::TBLIS, - allocator=DefaultAllocator()) where {T<:BlasFloat} +function TensorOperations.stridedtensortrace!(C::StridedView{T}, + A::StridedView{T}, + p::Index2Tuple, + q::Index2Tuple, + α::Number, β::Number, + backend::TBLIS, + allocator=DefaultAllocator()) where {T<:BlasFloat} argcheck_tensortrace(C, A, p, q) dimcheck_tensortrace(C, A, p, q) Base.mightalias(C, A) && throw(ArgumentError("output tensor must not be aliased with input tensor")) - # directly use TBLIS types to avoid additional conversion step - # isone(β) || rmul!(C, β) # TODO: check if TBLIS handles the scaling correctly C_tblis = tblis_tensor(C, β) A_tblis = tblis_tensor(A, α) einA, einC = TensorOperations.trace_labels(p, q) @@ -108,13 +107,13 @@ function stridedtensortrace!(C::StridedView{T}, return C end -function stridedtensorcontract!(C::StridedView{T}, - A::StridedView{T}, pA::Index2Tuple, - B::StridedView{T}, pB::Index2Tuple, - pAB::Index2Tuple, - α::Number, β::Number, - backend::TBLIS, - allocator=DefaultAllocator()) where {T<:BlasFloat} +function TensorOperations.stridedtensorcontract!(C::StridedView{T}, + A::StridedView{T}, pA::Index2Tuple, + B::StridedView{T}, pB::Index2Tuple, + pAB::Index2Tuple, + α::Number, β::Number, + backend::TBLIS, + allocator=DefaultAllocator()) where {T<:BlasFloat} argcheck_tensorcontract(C, A, pA, B, pB, pAB) dimcheck_tensorcontract(C, A, pA, B, pB, pAB) diff --git a/test/methods.jl b/test/methods.jl index ede5b58..8dc23dc 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -13,8 +13,6 @@ typelist = (Float32, Float64, ComplexF32, ComplexF64) @test C1 ≈ C2 @test C2 == C3 @test C1 ≈ ncon(Any[A], Any[[-2, -4, -1, -3]]) - @test_throws IndexError tensorcopy(1:4, A, 1:3) - @test_throws IndexError tensorcopy(1:4, A, [1, 2, 2, 4]) end @testset "tensoradd" begin @@ -26,45 +24,29 @@ typelist = (Float32, Float64, ComplexF32, ComplexF64) C3 = @inferred tensoradd(A, ((1:4...,), ()), false, B, (p, ()), false, 1, 1, b) @test C1 ≈ C2 @test C2 == C3 - @test C1 ≈ A + ncon(Any[B], Any[[-2, -4, -1, -3]]; backend=b) - @test_throws DimensionMismatch tensoradd(A, 1:4, B, 1:4) end @testset "tensortrace" begin A = randn(Float64, (50, 100, 100)) - C1 = zeros(50) - for i in 1:50 - for j in 1:100 - C1[i] += A[i, j, j] - end - end + C1 = tensortrace(A, [:a, :b, :b]) C2 = tensortrace(A, [:a, :b, :b]; backend=b) C3 = ncon(Any[A], Any[[-1, 1, 1]]; backend=b) @test C1 ≈ C2 @test C2 == C3 A = randn(Float64, (3, 20, 5, 3, 20, 4, 5)) - C1 = zeros(4, 3, 3) - for i1 in 1:4, i2 in 1:3, i3 in 1:3 - for j1 in 1:20, j2 in 1:5 - C1[i1, i2, i3] += A[i2, j1, j2, i3, j1, i1, j2] - end - end + C1 = tensortrace((:e, :a, :d), A, (:a, :b, :c, :d, :b, :e, :c)) C2 = @inferred tensortrace((:e, :a, :d), A, (:a, :b, :c, :d, :b, :e, :c); backend=b) C3 = @inferred tensortrace(A, ((6, 1, 4), ()), ((2, 3), (5, 7)), false, 1.0, b) C4 = ncon(Any[A], Any[[-2, 1, 2, -3, 1, -1, 2]]; backend=b) @test C1 ≈ C2 @test C2 == C3 == C4 - @test_throws IndexError tensortrace(randn(2, 2, 2, 2, 2, 2, 2), ((1,), (3, 2)), - ((1, 5), (2, 6)), false) end @testset "tensorcontract" begin A = randn(T, (3, 20, 5, 3, 4)) B = randn(T, (5, 6, 20, 3)) - C1 = zeros(T, (3, 3, 4, 3, 6)) - for a in 1:3, b in 1:20, c in 1:5, d in 1:3, e in 1:4, f in 1:6, g in 1:3 - C1[a, g, e, d, f] += A[a, b, c, d, e] * B[c, f, b, g] - end + C1 = tensorcontract((:a, :g, :e, :d, :f), A, (:a, :b, :c, :d, :e), B, + (:c, :f, :b, :g)) C2 = @inferred tensorcontract((:a, :g, :e, :d, :f), A, (:a, :b, :c, :d, :e), B, (:c, :f, :b, :g); backend=b) @@ -78,8 +60,6 @@ typelist = (Float32, Float64, ComplexF32, ComplexF64) @test C1 ≈ C2 @test C2 == C3 == C4 == C5 - @test_throws IndexError tensorcontract(A, [:a, :b, :c, :d], B, [:c, :f, :b, :g]) - @test_throws IndexError tensorcontract(A, [:a, :b, :c, :a, :e], B, [:c, :f, :b, :g]) end @testset "tensorproduct" begin @@ -90,17 +70,10 @@ typelist = (Float32, Float64, ComplexF32, ComplexF64) A, (1, 2, 3, 4), B, (5, 6, 7, 8); backend=b)), (5 * 5 * 5 * 5, 5 * 5 * 5 * 5)) @test C1 ≈ C2 - @test_throws IndexError tensorproduct(A, [:a, :b, :c, :d], - B, [:d, :e, :f, :g]) - @test_throws IndexError tensorproduct([:a, :b, :c, :d, :e, :f, :g, :i], - A, [:a, :b, :c, :d], B, [:e, :f, :g, :h]) A = rand(1, 2) B = rand(4, 5) - C1 = zeros(T, (2, 4, 1, 5)) - for i in axes(C1, 1), j in axes(C1, 2), k in axes(C1, 3), l in axes(C1, 4) - C1[i, j, k, l] = A[k, i] * B[j, l] - end + C1 = tensorcontract((-1, -2, -3, -4), A, (-3, -1), false, B, (-2, -4), false) C2 = tensorcontract((-1, -2, -3, -4), A, (-3, -1), false, B, (-2, -4), false; backend=b) C3 = tensorproduct(A, ((1, 2), ()), false, B, ((), (1, 2)), false, @@ -141,12 +114,11 @@ end p = (3, 1, 4, 2) Cbig = zeros(ComplexF32, (50, 50, 50, 50)) C = view(Cbig, 13 .+ (0:6), 11 .+ 4 * (0:9), 15 .+ 4 * (0:8), 4 .+ 3 * (0:6)) - Acopy = tensorcopy(p, A, 1:4) Ccopy = tensorcopy(1:4, C, 1:4) α = randn(ComplexF32) β = randn(ComplexF32) tensoradd!(C, A, (p, ()), false, α, β, b) - Ccopy = β * Ccopy + α * Acopy + tensoradd!(Ccopy, A, (p, ()), false, α, β) # default backend @test C ≈ Ccopy @test_throws IndexError tensoradd!(C, A, ((1, 2, 3), ()), false, 1.2, 0.5, b) @test_throws DimensionMismatch tensoradd!(C, A, ((1, 2, 3, 4), ()), false, 1.2, 0.5, @@ -164,10 +136,7 @@ end α = randn(Float64) β = randn(Float64) tensortrace!(B, A, ((2, 3), ()), ((1,), (4,)), true, α, β, b) - Bcopy = β * Bcopy - for i in 1 .+ (0:8) - Bcopy += α * conj(view(A, i, :, :, i)) - end + tensortrace!(Bcopy, A, ((2, 3), ()), ((1,), (4,)), true, α, β) # default backend @test B ≈ Bcopy @test_throws IndexError tensortrace!(B, A, ((1,), ()), ((2,), (3,)), false, α, β, b) @test_throws DimensionMismatch tensortrace!(B, A, ((1, 4), ()), ((2,), (3,)), false, @@ -178,6 +147,7 @@ end α, β, b) end + bref = TensorOperations.DefaultBackend() # reference backend @testset "tensorcontract! with allocator = $allocator" for allocator in (DefaultAllocator(), ManualAllocator()) @@ -192,36 +162,24 @@ end Ccopy = tensorcopy(C, 1:3) α = randn(ComplexF64) β = randn(ComplexF64) - Ccopy = β * Ccopy - for d in 1 .+ (0:8), a in 1 .+ (0:8), e in 1 .+ (0:7) - for b in 1 .+ (0:14), c in 1 .+ (0:6) - Ccopy[d, a, e] += α * A[a, b, c, d] * conj(B[c, e, b]) - end - end tensorcontract!(C, A, ((4, 1), (2, 3)), false, B, ((3, 1), (2,)), true, ((1, 2, 3), ()), α, β, b, allocator) + tensorcontract!(Ccopy, A, ((4, 1), (2, 3)), false, B, ((3, 1), (2,)), true, + ((1, 2, 3), ()), α, β, bref, allocator) @test C ≈ Ccopy Ccopy = tensorcopy(C, 1:3) - Ccopy = β * Ccopy - for d in 1 .+ (0:8), a in 1 .+ (0:8), e in 1 .+ (0:7) - for b in 1 .+ (0:14), c in 1 .+ (0:6) - Ccopy[d, a, e] += α * conj(A[a, b, c, d]) * conj(B[c, e, b]) - end - end tensorcontract!(C, A, ((4, 1), (2, 3)), true, B, ((3, 1), (2,)), true, ((1, 2, 3), ()), α, β, b, allocator) + tensorcontract!(Ccopy, A, ((4, 1), (2, 3)), true, B, ((3, 1), (2,)), true, + ((1, 2, 3), ()), α, β, bref, allocator) @test C ≈ Ccopy Ccopy = tensorcopy(C, 1:3) - Ccopy = β * Ccopy - for d in 1 .+ (0:8), a in 1 .+ (0:8), e in 1 .+ (0:7) - for b in 1 .+ (0:14), c in 1 .+ (0:6) - Ccopy[d, a, e] += α * conj(A[a, b, c, d]) * B[c, e, b] - end - end tensorcontract!(C, A, ((4, 1), (2, 3)), true, B, ((3, 1), (2,)), false, ((1, 2, 3), ()), α, β, b, allocator) + tensorcontract!(Ccopy, A, ((4, 1), (2, 3)), true, B, ((3, 1), (2,)), false, + ((1, 2, 3), ()), α, β, bref, allocator) @test C ≈ Ccopy @test_throws IndexError tensorcontract!(C, diff --git a/test/tensor.jl b/test/tensor.jl index a9255f1..74b0b80 100644 --- a/test/tensor.jl +++ b/test/tensor.jl @@ -8,12 +8,6 @@ b = TBLIS() C1 = permutedims(A, p) @tensor backend = b C2[4, 1, 3, 2] := A[1, 2, 3, 4] @test C1 ≈ C2 - @test_throws IndexError begin - @tensor C[1, 2, 3, 4] := A[1, 2, 3] - end - @test_throws IndexError begin - @tensor C[1, 2, 3, 4] := A[1, 2, 2, 4] - end B = randn(Float64, (5, 6, 3, 4)) p = [3, 1, 4, 2] @@ -26,42 +20,26 @@ b = TBLIS() A = randn(Float64, (50, 100, 100)) @tensor backend = b C1[a] := A[a, b', b'] - C2 = zeros(50) - for i in 1:50 - for j in 1:100 - C2[i] += A[i, j, j] - end - end + @tensor C2[a] := A[a, b', b'] @test C1 ≈ C2 + A = randn(Float64, (3, 20, 5, 3, 20, 4, 5)) @tensor backend = b C1[e, a, d] := A[a, b, c, d, b, e, c] - C2 = zeros(4, 3, 3) - for i1 in 1:4, i2 in 1:3, i3 in 1:3 - for j1 in 1:20, j2 in 1:5 - C2[i1, i2, i3] += A[i2, j1, j2, i3, j1, i1, j2] - end - end + @tensor C2[e, a, d] := A[a, b, c, d, b, e, c] @test C1 ≈ C2 A = randn(Float64, (3, 20, 5, 3, 4)) B = randn(Float64, (5, 6, 20, 3)) @tensor backend = b C1[a, g, e, d, f] := A[a, b, c, d, e] * B[c, f, b, g] - C2 = zeros(3, 3, 4, 3, 6) - for a in 1:3, b in 1:20, c in 1:5, d in 1:3, e in 1:4, f in 1:6, g in 1:3 - C2[a, g, e, d, f] += A[a, b, c, d, e] * B[c, f, b, g] - end + @tensor C2[a, g, e, d, f] := A[a, b, c, d, e] * B[c, f, b, g] @test C1 ≈ C2 - @test_throws IndexError begin - @tensor backend = b A[a, b, c, d] * B[c, f, b, g] - end end @testset "tensorcontract 2" begin A = randn(ComplexF32, (5, 5, 5, 5)) B = rand(ComplexF32, (5, 5, 5, 5)) @tensor backend = b C1[1, 2, 5, 6, 3, 4, 7, 8] := A[1, 2, 3, 4] * B[5, 6, 7, 8] - C2 = reshape(kron(reshape(B, (25, 25)), reshape(A, (25, 25))), - (5, 5, 5, 5, 5, 5, 5, 5)) + @tensor C2[1, 2, 5, 6, 3, 4, 7, 8] := A[1, 2, 3, 4] * B[5, 6, 7, 8] @test C1 ≈ C2 @test_throws IndexError begin @tensor backend = b C[a, b, c, d, e, f, g, i] := A[a, b, c, d] * B[e, f, g, h] @@ -75,13 +53,7 @@ end C = rand(ComplexF64, (Dd, Dh, Df)) @tensor backend = b D1[d, f, h] := A[a, c, f, a, e, b, b, g] * B[c, h, g, e, d] + 0.5 * C[d, h, f] - D2 = zeros(ComplexF64, (Dd, Df, Dh)) - for d in 1:Dd, f in 1:Df, h in 1:Dh - D2[d, f, h] += 0.5 * C[d, h, f] - for a in 1:Da, b in 1:Db, c in 1:Dc, e in 1:De, g in 1:Dg - D2[d, f, h] += A[a, c, f, a, e, b, b, g] * B[c, h, g, e, d] - end - end + @tensor D2[d, f, h] := A[a, c, f, a, e, b, b, g] * B[c, h, g, e, d] + 0.5 * C[d, h, f] @test D1 ≈ D2 @test norm(vec(D1)) ≈ sqrt(abs((@tensor backend = b tensorscalar(D1[d, f, h] * conj(D1[d, f, h]))))) @@ -93,20 +65,10 @@ end A = view(Abig, 1 .+ 3 .* (0:9), 2 .+ 2 .* (0:6), 5 .+ 4 .* (0:6), 4 .+ 3 .* (0:8)) Cbig = zeros(Float32, (50, 50, 50, 50)) C = view(Cbig, 13 .+ (0:6), 11 .+ 4 .* (0:9), 15 .+ 4 .* (0:8), 4 .+ 3 .* (0:6)) - Acopy = copy(A) Ccopy = copy(C) @tensor backend = b C[3, 1, 4, 2] = A[1, 2, 3, 4] - @tensor backend = b Ccopy[3, 1, 4, 2] = Acopy[1, 2, 3, 4] + @tensor Ccopy[3, 1, 4, 2] = A[1, 2, 3, 4] @test C ≈ Ccopy - @test_throws IndexError begin - @tensor C[3, 1, 4, 2] = A[1, 2, 3] - end - @test_throws DimensionMismatch begin - @tensor C[3, 1, 4, 2] = A[3, 1, 4, 2] - end - @test_throws IndexError begin - @tensor C[1, 1, 2, 3] = A[1, 2, 3, 4] - end end @testset "views 2" begin @@ -115,22 +77,11 @@ end A = view(Abig, 1 .+ 3 .* (0:9), 2 .+ 2 .* (0:6), 5 .+ 4 .* (0:6), 4 .+ 3 .* (0:8)) Cbig = zeros(ComplexF64, (50, 50, 50, 50)) C = view(Cbig, 13 .+ (0:6), 11 .+ 4 .* (0:9), 15 .+ 4 .* (0:8), 4 .+ 3 .* (0:6)) - Acopy = permutedims(copy(A), p) - Ccopy = copy(C) α = randn(Float64) β = randn(Float64) @tensor backend = b D[3, 1, 4, 2] := β * C[3, 1, 4, 2] + α * A[1, 2, 3, 4] - Dcopy = β * Ccopy + α * Acopy + @tensor Dcopy[3, 1, 4, 2] := β * C[3, 1, 4, 2] + α * A[1, 2, 3, 4] @test D ≈ Dcopy - @test_throws IndexError begin - @tensor D[3, 1, 4, 2] = 0.5 * C[3, 1, 4, 2] + 1.2 * A[1, 2, 3] - end - @test_throws DimensionMismatch begin - @tensor D[3, 1, 4, 2] = 0.5 * C[3, 1, 4, 2] + 1.2 * A[3, 1, 2, 4] - end - @test_throws IndexError begin - @tensor D[1, 1, 2, 3] = 0.5 * C[1, 1, 2, 3] + 1.2 * A[1, 2, 3, 4] - end end @testset "views 3" begin @@ -138,26 +89,11 @@ end A = view(Abig, 1 .+ 3 .* (0:8), 2 .+ 2 .* (0:14), 5 .+ 4 .* (0:6), 7 .+ 2 .* (0:8)) Bbig = rand(Float64, (50, 50)) B = view(Bbig, 13 .+ (0:14), 3 .+ 5 .* (0:6)) - Acopy = copy(A) Bcopy = copy(B) α = randn(Float64) @tensor backend = b B[b, c] += α * A[a, b, c, a] - for i in 1 .+ (0:8) - Bcopy += α * view(A, i, :, :, i) - end + @tensor Bcopy[b, c] += α * A[a, b, c, a] @test B ≈ Bcopy - @test_throws IndexError begin - @tensor B[b, c] += α * A[a, b, c] - end - @test_throws DimensionMismatch begin - @tensor B[c, b] += α * A[a, b, c, a] - end - @test_throws IndexError begin - @tensor B[c, b] += α * A[a, b, a, a] - end - @test_throws DimensionMismatch begin - @tensor B[c, b] += α * A[a, b, a, c] - end end @testset "views 4" begin @@ -167,16 +103,10 @@ end B = view(Bbig, 3 .+ 5 .* (0:6), 7 .+ 2 .* (0:7), 13 .+ (0:14)) Cbig = rand(ComplexF64, (40, 40, 40)) C = view(Cbig, 3 .+ 2 .* (0:8), 13 .+ (0:8), 7 .+ 3 .* (0:7)) - Acopy = copy(A) - Bcopy = copy(B) Ccopy = copy(C) α = randn(Float64) - for d in 1 .+ (0:8), a in 1 .+ (0:8), e in 1 .+ (0:7) - for b in 1 .+ (0:14), c in 1 .+ (0:6) - Ccopy[d, a, e] -= α * A[a, b, c, d] * conj(B[c, e, b]) - end - end @tensor backend = b C[d, a, e] -= α * A[a, b, c, d] * conj(B[c, e, b]) + @tensor Ccopy[d, a, e] -= α * A[a, b, c, d] * conj(B[c, e, b]) @test C ≈ Ccopy end @@ -189,25 +119,9 @@ end Cbig = rand(ComplexF32, (40, 40, 40)) C = view(Cbig, 3 .+ 2 .* (0:8), 13 .+ (0:8), 7 .+ 3 .* (0:7)) Ccopy = copy(C) - for d in 1 .+ (0:8), a in 1 .+ (0:8), e in 1 .+ (0:7) - for b in 1 .+ (0:14), c in 1 .+ (0:6) - Ccopy[d, a, e] += α * A[a, b, c, d] * conj(B[c, e, b]) - end - end @tensor backend = b C[d, a, e] += α * A[a, b, c, d] * conj(B[c, e, b]) + @tensor Ccopy[d, a, e] += α * A[a, b, c, d] * conj(B[c, e, b]) @test C ≈ Ccopy - @test_throws IndexError begin - @tensor C[d, a, e] += α * A[a, b, c, a] * B[c, e, b] - end - @test_throws IndexError begin - @tensor C[d, a, e] += α * A[a, b, c, d] * B[c, b] - end - @test_throws IndexError begin - @tensor C[d, e] += α * A[a, b, c, d] * B[c, e, b] - end - @test_throws DimensionMismatch begin - @tensor C[d, e, a] += α * A[a, b, c, d] * B[c, e, b] - end end # Simple function example @@ -228,7 +142,7 @@ end B = randn(5, 5, 5) C = randn(5, 5, 5) D = zeros(5, 5, 5) - @tensor begin + @tensor backend = b begin D[a, b, c] = A[a, e, f, c, f, g] * B[g, b, e] + α * C[c, a, b] E[a, b, c] := A[a, e, f, c, f, g] * B[g, b, e] + α * C[c, a, b] end @@ -246,12 +160,8 @@ scalartypelist = (Float32, Float64, ComplexF32, ComplexF64) rhoR = rand(T, D3, D3) .- 1 // 2 H = rand(T, d1, d2, d1, d2) .- 1 // 2 A12 = reshape(reshape(A1, D1 * d1, D2) * reshape(A2, D2, d2 * D3), (D1, d1, d2, D3)) - rA12 = reshape(reshape(rhoL * reshape(A12, (D1, d1 * d2 * D3)), - (D1 * d1 * d2, D3)) * rhoR, (D1, d1, d2, D3)) - HrA12 = permutedims(reshape(reshape(H, (d1 * d2, d1 * d2)) * - reshape(permutedims(rA12, (2, 3, 1, 4)), - (d1 * d2, D1 * D3)), (d1, d2, D1, D3)), - (3, 1, 2, 4)) + @tensor HrA12[a, s1, s2, c] := rhoL[a, a'] * A1[a', t1, b] * A2[b, t2, c'] * + rhoR[c', c] * H[s1, s2, t1, t2] E = dot(A12, HrA12) @tensor backend = b HrA12′[a, s1, s2, c] := rhoL[a, a'] * A1[a', t1, b] * A2[b, t2, c'] * @@ -263,23 +173,25 @@ scalartypelist = (Float32, Float64, ComplexF32, ComplexF64) HrA12′′′[a, s1, s2, c] := rhoL[a, a'] * H[s1, s2, t1, t2] * A2[b, t2, c'] * rhoR[c', c] * A1[a', t1, b] # should be contracted in exactly same order end - @tensoropt HrA12′′′′[:] := rhoL[-1, 1] * H[-2, -3, 4, 5] * A2[2, 5, 3] * - rhoR[3, -4] * A1[1, 4, 2] + @tensor backend = b opt = true HrA12′′′′[:] := rhoL[-1, 1] * H[-2, -3, 4, 5] * + A2[2, 5, 3] * + rhoR[3, -4] * A1[1, 4, 2] @test HrA12′ == HrA12′′ == HrA12′′′ # should be exactly equal @test HrA12 ≈ HrA12′ @test HrA12 ≈ HrA12′′′′ @test HrA12′′ ≈ ncon([rhoL, H, A2, rhoR, A1], - [[-1, 1], [-2, -3, 4, 5], [2, 5, 3], [3, -4], [1, 4, 2]]) + [[-1, 1], [-2, -3, 4, 5], [2, 5, 3], [3, -4], [1, 4, 2]]; backend=b) @test HrA12′′ == @ncon([rhoL, H, A2, rhoR, A1], [[-1, 1], [-2, -3, 4, 5], [2, 5, 3], [3, -4], [1, 4, 2]]; order=[1, 2, 3, 4, 5], output=[-1, -2, -3, -4], backend=b) @test E ≈ - @tensor tensorscalar(rhoL[a', a] * A1[a, s, b] * A2[b, s', c] * rhoR[c, c'] * - H[t, t', s, s'] * conj(A1[a', t, b']) * - conj(A2[b', t', c'])) + @tensor backend = b tensorscalar(rhoL[a', a] * A1[a, s, b] * A2[b, s', c] * + rhoR[c, c'] * + H[t, t', s, s'] * conj(A1[a', t, b']) * + conj(A2[b', t', c'])) @test E ≈ @ncon([rhoL, A1, A2, rhoR, H, conj(A1), conj(A2)], [[5, 1], [1, 2, 3], [3, 4, 9], [9, 10], [6, 8, 2, 4], [5, 6, 7], - [7, 8, 10]]) + [7, 8, 10]]; backend=b) # this implicitly tests that `ncon` returns a scalar if no open indices end From d2f747c929e4aacce710b04471b9adc6a0bc4a0b Mon Sep 17 00:00:00 2001 From: Jutho Date: Sat, 2 Nov 2024 23:04:42 +0100 Subject: [PATCH 3/7] add TBLIS docstring --- src/TensorOperationsTBLIS.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/TensorOperationsTBLIS.jl b/src/TensorOperationsTBLIS.jl index a07063b..24b3715 100644 --- a/src/TensorOperationsTBLIS.jl +++ b/src/TensorOperationsTBLIS.jl @@ -23,7 +23,14 @@ set_num_tblis_threads(n) = LibTBLIS.tblis_set_num_threads(convert(Cuint, n)) # TensorOperations #------------------ - +""" + TBLIS() + +Backend for tensor operations on strided arrays (arrays that can be cast into a `StridedView`) +that uses [TBLIS](https://github.com/devinamatthews/tblis) library. This library can perform +tensor contractions without additional intermediate tensors. It does not use LinearAlgebra +or BLAS. +""" struct TBLIS <: TensorOperations.AbstractBackend end Base.@deprecate(tblisBackend(), TBLIS()) From 0fbff61d356e91ce886998c90bdb394677da7f08 Mon Sep 17 00:00:00 2001 From: Lukas Devos Date: Sun, 3 Nov 2024 07:53:02 -0500 Subject: [PATCH 4/7] Update author email --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 70ed301..05955a0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "TensorOperationsTBLIS" uuid = "1e289f0c-8058-4c3e-8acf-f8ef036bd865" -authors = ["Lukas Devos ", "Jutho Haegeman "] +authors = ["Lukas Devos ", "Jutho Haegeman "] version = "0.3.0" [deps] From a079d1f05919fda6d8e15090e010f1a8dbae327f Mon Sep 17 00:00:00 2001 From: Lukas Devos Date: Sun, 3 Nov 2024 07:58:27 -0500 Subject: [PATCH 5/7] `get_num_tblis_threads` to unexported `get_num_threads` --- src/TensorOperationsTBLIS.jl | 9 ++++----- test/runtests.jl | 8 ++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/TensorOperationsTBLIS.jl b/src/TensorOperationsTBLIS.jl index 24b3715..7a6b66d 100644 --- a/src/TensorOperationsTBLIS.jl +++ b/src/TensorOperationsTBLIS.jl @@ -16,10 +16,9 @@ using .LibTBLIS using .LibTBLIS: LibTBLIS, len_type, stride_type export TBLIS -export get_num_tblis_threads, set_num_tblis_threads -get_num_tblis_threads() = convert(Int, LibTBLIS.tblis_get_num_threads()) -set_num_tblis_threads(n) = LibTBLIS.tblis_set_num_threads(convert(Cuint, n)) +get_num_threads() = convert(Int, LibTBLIS.tblis_get_num_threads()) +set_num_threads(n) = LibTBLIS.tblis_set_num_threads(convert(Cuint, n)) # TensorOperations #------------------ @@ -34,8 +33,8 @@ or BLAS. struct TBLIS <: TensorOperations.AbstractBackend end Base.@deprecate(tblisBackend(), TBLIS()) -Base.@deprecate(tblis_get_num_threads(), get_num_tblis_threads()) -Base.@deprecate(tblis_set_num_threads(n), set_num_tblis_threads(n)) +Base.@deprecate(tblis_get_num_threads(), get_num_threads()) +Base.@deprecate(tblis_set_num_threads(n), set_num_threads(n)) include("strided.jl") diff --git a/test/runtests.jl b/test/runtests.jl index 8e76130..74de148 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,16 +5,16 @@ using Test, Random, LinearAlgebra Random.seed!(1234567) @testset "TensorOperationsTBLIS.jl" begin - set_num_tblis_threads(1) + TensorOperationsTBLIS.set_num_threads(1) @testset "method syntax" verbose = true begin include("methods.jl") end - @test get_num_tblis_threads() == 1 - set_num_tblis_threads(2) + @test TensorOperationsTBLIS.get_num_threads() == 1 + TensorOperationsTBLIS.set_num_threads(2) @testset "macro with index notation" verbose = true begin include("tensor.jl") end - @test get_num_tblis_threads() == 2 + @test TensorOperationsTBLIS.get_num_threads() == 2 end From e3ef8952bab777d796f6eac0f5651db3d8ec1ccd Mon Sep 17 00:00:00 2001 From: Lukas Devos Date: Sun, 3 Nov 2024 08:02:07 -0500 Subject: [PATCH 6/7] Update README --- README.md | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 90ce28d..1342a9d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # TensorOperationsTBLIS.jl -Julia wrapper for [tblis](https://github.com/devinamatthews/tblis) with [TensorOperations.jl](https://github.com/Jutho/TensorOperations.jl). +Julia wrapper for [TBLIS](https://github.com/devinamatthews/tblis) with [TensorOperations.jl](https://github.com/Jutho/TensorOperations.jl). [![CI][ci-img]][ci-url] [![CI (Julia nightly)][ci-julia-nightly-img]][ci-julia-nightly-url] [![][codecov-img]][codecov-url] @@ -13,15 +13,14 @@ Julia wrapper for [tblis](https://github.com/devinamatthews/tblis) with [TensorO [codecov-img]: https://codecov.io/gh/lkdvos/TensorOperationsTBLIS.jl/graph/badge.svg?token=R86L0S70VT [codecov-url]: https://codecov.io/gh/lkdvos/TensorOperationsTBLIS.jl -Currently provides implementations of `tensorcontract!`, `tensoradd!` and `tensortrace!` for -`StridedArray{<:BlasFloat}`. These can be accessed through the backend system of -TensorOperations, i.e. +Currently provides implementations of `tensorcontract!`, `tensoradd!` and `tensortrace!` for array types compatible with Strided.jl, i.e. `StridedView{<:BlasFloat}`. +These can be accessed through the backend system of TensorOperations, i.e. ```julia using TensorOperations using TensorOperationsTBLIS -tblisbackend = tblisBackend() +tblisbackend = TBLIS() α = randn() A = randn(5, 5, 5, 5, 5, 5) B = randn(5, 5, 5) @@ -34,17 +33,16 @@ D = zeros(5, 5, 5) end ``` -Additionally, the number of threads used by tblis can be set by: +Additionally, the number of threads used by TBLIS can be set by: ```julia -using TensorOperationsTBLIS -tblis_set_num_threads(4) -@show tblis_get_num_threads() +TensorOperationsTBLIS.set_num_threads(4) +@show TensorOperationsTBLIS.get_num_threads() ``` ## Notes -- This implementation of tblis for TensorOperations.jl is only supported from v5 of +- This implementation of TBLIS for TensorOperations.jl is only supported from v5 of TensorOperations.jl onwards. For v4, an earlier version of this package exists. For older versions, you could look for [BliContractor.jl](https://github.com/xrq-phys/BliContractor.jl) or From efc9fec14a6fd648794ad89d09716ecbb6a75c10 Mon Sep 17 00:00:00 2001 From: Lukas Devos Date: Sun, 3 Nov 2024 08:10:49 -0500 Subject: [PATCH 7/7] Add codecov ignore for `lib` --- .github/codecov.yml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .github/codecov.yml diff --git a/.github/codecov.yml b/.github/codecov.yml new file mode 100644 index 0000000..6201ea2 --- /dev/null +++ b/.github/codecov.yml @@ -0,0 +1,2 @@ +ignore: + - "src/lib" \ No newline at end of file