From aec5d4c0fa7f98d5f010681b0139e9835eccb890 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 19 Sep 2024 19:23:18 -0400 Subject: [PATCH 01/15] Mostly a todo list --- .../ITensorsVectorInterfaceExt.jl | 7 ++++++- src/broadcast.jl | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ext/ITensorsVectorInterfaceExt/ITensorsVectorInterfaceExt.jl b/ext/ITensorsVectorInterfaceExt/ITensorsVectorInterfaceExt.jl index eb4026fdb1..f4ee4a69a2 100644 --- a/ext/ITensorsVectorInterfaceExt/ITensorsVectorInterfaceExt.jl +++ b/ext/ITensorsVectorInterfaceExt/ITensorsVectorInterfaceExt.jl @@ -38,7 +38,12 @@ function VectorInterface.add(a::ITensor, b::ITensor, α::Number, β::Number) return a * β + b * α end function VectorInterface.add!(a::ITensor, b::ITensor, α::Number, β::Number) - a .= a .* β .+ b .* α + ## TODO Coding it this way skips code which fails on GPU in broadcast.jl + ## Not a permanant fix, just here until the broadcast function can be + ## updated + a .*= β + a .+= b .* α + # a .= a .* β .+ b .* α return a end function VectorInterface.add!!(a::ITensor, b::ITensor, α::Number, β::Number) diff --git a/src/broadcast.jl b/src/broadcast.jl index 3e91d64714..841b68d2f3 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -414,6 +414,10 @@ function Base.copyto!( A, C = C, A end if !isnothing(A) && !isnothing(C) && !isnothing(α) && !isnothing(β) + ## TODO this line fails, because the function (r, t) -> β * r + α * t + ## is an anonymous ITensor function which the GPU compiler cannot process + ## This code is being called in ITensorsVectorInterfaceExt but checked was missed because + map!((r, t) -> β * r + α * t, T, T, A) else bc_bc_α = find_type(Broadcasted, bc_α.args) From a907a1a575c9e8588f66ba2d350ecb439ab5078d Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 19 Sep 2024 19:30:49 -0400 Subject: [PATCH 02/15] format --- src/broadcast.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index 841b68d2f3..947080cccc 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -417,7 +417,7 @@ function Base.copyto!( ## TODO this line fails, because the function (r, t) -> β * r + α * t ## is an anonymous ITensor function which the GPU compiler cannot process ## This code is being called in ITensorsVectorInterfaceExt but checked was missed because - + map!((r, t) -> β * r + α * t, T, T, A) else bc_bc_α = find_type(Broadcasted, bc_α.args) From 78684480ff99ef6dd34e6028ab6259459965ea95 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Mon, 7 Oct 2024 11:01:13 -0400 Subject: [PATCH 03/15] Revert changes --- .../ITensorsVectorInterfaceExt.jl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ext/ITensorsVectorInterfaceExt/ITensorsVectorInterfaceExt.jl b/ext/ITensorsVectorInterfaceExt/ITensorsVectorInterfaceExt.jl index f4ee4a69a2..eb4026fdb1 100644 --- a/ext/ITensorsVectorInterfaceExt/ITensorsVectorInterfaceExt.jl +++ b/ext/ITensorsVectorInterfaceExt/ITensorsVectorInterfaceExt.jl @@ -38,12 +38,7 @@ function VectorInterface.add(a::ITensor, b::ITensor, α::Number, β::Number) return a * β + b * α end function VectorInterface.add!(a::ITensor, b::ITensor, α::Number, β::Number) - ## TODO Coding it this way skips code which fails on GPU in broadcast.jl - ## Not a permanant fix, just here until the broadcast function can be - ## updated - a .*= β - a .+= b .* α - # a .= a .* β .+ b .* α + a .= a .* β .+ b .* α return a end function VectorInterface.add!!(a::ITensor, b::ITensor, α::Number, β::Number) From ea0da01f3a871eb957b031cc6293fd30529b5ea4 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Mon, 7 Oct 2024 15:50:42 -0400 Subject: [PATCH 04/15] Attempting to fix broadcast --- src/broadcast.jl | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index 947080cccc..a80380e0b5 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -414,11 +414,19 @@ function Base.copyto!( A, C = C, A end if !isnothing(A) && !isnothing(C) && !isnothing(α) && !isnothing(β) - ## TODO this line fails, because the function (r, t) -> β * r + α * t - ## is an anonymous ITensor function which the GPU compiler cannot process - ## This code is being called in ITensorsVectorInterfaceExt but checked was missed because - map!((r, t) -> β * r + α * t, T, T, A) + #map!((r, t) -> β * r + α * t, T, T, A) + ## I tried to make a closure like this but + ## This throws an error still during GPU compiling. + # f1 = + + # f2 = * + # map!((r, t) -> f1(f2(β, r), f2(α, t)), T, T, A) + ## The only way I could get this to work was by + ## Splitting it into two calls + ## The original call made 51 allocations on CPU and this new + ## version makes 44 allocations + T .= β .* T + T .+= α .* A else bc_bc_α = find_type(Broadcasted, bc_α.args) if isnothing(α) From 5aefd89383f7bcb01d467cb276ec6334c5bbf0f4 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 8 Oct 2024 10:15:22 -0400 Subject: [PATCH 05/15] create a closure object to force alpha and beta to be bits --- src/broadcast.jl | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index a80380e0b5..59a29c574d 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -395,6 +395,13 @@ end # C .= β .* C .+ α .* A .* B # +struct axpby{T} + alpha::T + beta::T +end + +(xy::axpby)(x, y) = x * xy.alpha + y * xy.beta + ## TODO this code doesn't actually get called function Base.copyto!( T::ITensor, @@ -416,17 +423,9 @@ function Base.copyto!( if !isnothing(A) && !isnothing(C) && !isnothing(α) && !isnothing(β) #map!((r, t) -> β * r + α * t, T, T, A) - ## I tried to make a closure like this but - ## This throws an error still during GPU compiling. - # f1 = + - # f2 = * - # map!((r, t) -> f1(f2(β, r), f2(α, t)), T, T, A) - ## The only way I could get this to work was by - ## Splitting it into two calls - ## The original call made 51 allocations on CPU and this new - ## version makes 44 allocations - T .= β .* T - T .+= α .* A + + ab = axpby{promote_type(typeof(α), typeof(β))}(α, β) + map!((r,t) -> ab(r, t), T, T, A) else bc_bc_α = find_type(Broadcasted, bc_α.args) if isnothing(α) From 359a86a77b57a6059adb1fe4e2ca32ff26ec634c Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 8 Oct 2024 10:20:10 -0400 Subject: [PATCH 06/15] Make alpha/beta bits type fo fix GPU --- src/broadcast.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index 59a29c574d..0e54dc19e7 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -422,10 +422,10 @@ function Base.copyto!( end if !isnothing(A) && !isnothing(C) && !isnothing(α) && !isnothing(β) - #map!((r, t) -> β * r + α * t, T, T, A) + # map!((r, t) -> β * r + α * t, T, T, A) ab = axpby{promote_type(typeof(α), typeof(β))}(α, β) - map!((r,t) -> ab(r, t), T, T, A) + map!((r,t) -> ab(t, r), T, T, A) else bc_bc_α = find_type(Broadcasted, bc_α.args) if isnothing(α) From cbd5162524d5d35384c91c95c5a682f0ddb56b20 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 8 Oct 2024 10:20:32 -0400 Subject: [PATCH 07/15] format --- src/broadcast.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index 0e54dc19e7..01e5840bf2 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -400,7 +400,7 @@ struct axpby{T} beta::T end -(xy::axpby)(x, y) = x * xy.alpha + y * xy.beta +(xy::axpby)(x, y) = x * xy.alpha + y * xy.beta ## TODO this code doesn't actually get called function Base.copyto!( @@ -425,7 +425,7 @@ function Base.copyto!( # map!((r, t) -> β * r + α * t, T, T, A) ab = axpby{promote_type(typeof(α), typeof(β))}(α, β) - map!((r,t) -> ab(t, r), T, T, A) + map!((r, t) -> ab(t, r), T, T, A) else bc_bc_α = find_type(Broadcasted, bc_α.args) if isnothing(α) From 4e5aae142dcebcc55ba2daa72f442ed646a40661 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 8 Oct 2024 11:50:40 -0400 Subject: [PATCH 08/15] Make axpby more flexible --- src/broadcast.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index 01e5840bf2..11c003a346 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -395,9 +395,9 @@ end # C .= β .* C .+ α .* A .* B # -struct axpby{T} - alpha::T - beta::T +struct axpby{Alpha, Beta} + alpha::Alpha + beta::Beta end (xy::axpby)(x, y) = x * xy.alpha + y * xy.beta From 3e0c4d03918b6178cf485c01b6390ba9fbbc75f7 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 8 Oct 2024 11:51:07 -0400 Subject: [PATCH 09/15] format --- src/broadcast.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index 11c003a346..e289555045 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -395,7 +395,7 @@ end # C .= β .* C .+ α .* A .* B # -struct axpby{Alpha, Beta} +struct axpby{Alpha,Beta} alpha::Alpha beta::Beta end From a685ba8fd7e292e210d4d74bd5028279b5565c0d Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 8 Oct 2024 19:30:05 -0400 Subject: [PATCH 10/15] Cleanup and make axpby a function --- src/broadcast.jl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index e289555045..b53e9b7ccb 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -395,12 +395,12 @@ end # C .= β .* C .+ α .* A .* B # -struct axpby{Alpha,Beta} +struct axpby{Alpha,Beta} <: Function alpha::Alpha beta::Beta end -(xy::axpby)(x, y) = x * xy.alpha + y * xy.beta +(f::axpby)(y, x) = x * f.alpha + y * f.beta ## TODO this code doesn't actually get called function Base.copyto!( @@ -424,8 +424,7 @@ function Base.copyto!( # map!((r, t) -> β * r + α * t, T, T, A) - ab = axpby{promote_type(typeof(α), typeof(β))}(α, β) - map!((r, t) -> ab(t, r), T, T, A) + map!(axpby(α, β), T, T, A) else bc_bc_α = find_type(Broadcasted, bc_α.args) if isnothing(α) From b813e99fee8da5008672f7be47991d94f4ff6e90 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Fri, 11 Oct 2024 15:21:37 -0400 Subject: [PATCH 11/15] add tests --- NDTensors/test/test_dense.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/NDTensors/test/test_dense.jl b/NDTensors/test/test_dense.jl index 94c52f4132..623591439d 100644 --- a/NDTensors/test/test_dense.jl +++ b/NDTensors/test/test_dense.jl @@ -83,6 +83,20 @@ NDTensors.dim(i::MyInd) = i.dim @test A[2, 2] == Aview[1, 1] end + ## Testing A .= α .* B .+ β .* A + C = copy(A) + @allowscalar fill!(B, zero(elt)) + β = elt(2.0) + α = elt(1.0) + permutedims!!(A, B, (1,2), (a,b) -> +(*(β, a), *(α, b))) + @allowscalar 2.0 .* C == A + randn!(B) + C = copy(A) + A = permutedims!!(A, B, (1,2), (a,b) -> +(*(β, a), *(α, b))) + @allowscalar for i in 1:3, j in 1:4 + @test A[i,j] == α * B[i,j] + β * C[i,j] + end + ## add elt around 2.0 to preserve the eltype of A. @test data(A * elt(2.0)) == data(elt(2.0) * A) From 3b6cfc54cccf12998565965eadb78b91bd7caa4b Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Fri, 11 Oct 2024 15:21:50 -0400 Subject: [PATCH 12/15] format --- NDTensors/test/test_dense.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/NDTensors/test/test_dense.jl b/NDTensors/test/test_dense.jl index 623591439d..fb110bc040 100644 --- a/NDTensors/test/test_dense.jl +++ b/NDTensors/test/test_dense.jl @@ -88,13 +88,13 @@ NDTensors.dim(i::MyInd) = i.dim @allowscalar fill!(B, zero(elt)) β = elt(2.0) α = elt(1.0) - permutedims!!(A, B, (1,2), (a,b) -> +(*(β, a), *(α, b))) + permutedims!!(A, B, (1, 2), (a, b) -> +(*(β, a), *(α, b))) @allowscalar 2.0 .* C == A randn!(B) C = copy(A) - A = permutedims!!(A, B, (1,2), (a,b) -> +(*(β, a), *(α, b))) + A = permutedims!!(A, B, (1, 2), (a, b) -> +(*(β, a), *(α, b))) @allowscalar for i in 1:3, j in 1:4 - @test A[i,j] == α * B[i,j] + β * C[i,j] + @test A[i, j] == α * B[i, j] + β * C[i, j] end ## add elt around 2.0 to preserve the eltype of A. From 7525f30725f847bdc995ea616b0190745b17c514 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Sat, 12 Oct 2024 13:16:07 -0400 Subject: [PATCH 13/15] Update NDTensors/test/test_dense.jl Co-authored-by: Matt Fishman --- NDTensors/test/test_dense.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NDTensors/test/test_dense.jl b/NDTensors/test/test_dense.jl index fb110bc040..80be2696fb 100644 --- a/NDTensors/test/test_dense.jl +++ b/NDTensors/test/test_dense.jl @@ -86,8 +86,8 @@ NDTensors.dim(i::MyInd) = i.dim ## Testing A .= α .* B .+ β .* A C = copy(A) @allowscalar fill!(B, zero(elt)) - β = elt(2.0) - α = elt(1.0) + β = elt(2) + α = elt(1) permutedims!!(A, B, (1, 2), (a, b) -> +(*(β, a), *(α, b))) @allowscalar 2.0 .* C == A randn!(B) From c4f014e6767c5daa15a799bf344349348e12f7c4 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Sat, 12 Oct 2024 13:16:12 -0400 Subject: [PATCH 14/15] Update NDTensors/test/test_dense.jl Co-authored-by: Matt Fishman --- NDTensors/test/test_dense.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NDTensors/test/test_dense.jl b/NDTensors/test/test_dense.jl index 80be2696fb..c2b327811a 100644 --- a/NDTensors/test/test_dense.jl +++ b/NDTensors/test/test_dense.jl @@ -89,7 +89,7 @@ NDTensors.dim(i::MyInd) = i.dim β = elt(2) α = elt(1) permutedims!!(A, B, (1, 2), (a, b) -> +(*(β, a), *(α, b))) - @allowscalar 2.0 .* C == A + @allowscalar 2 .* C == A randn!(B) C = copy(A) A = permutedims!!(A, B, (1, 2), (a, b) -> +(*(β, a), *(α, b))) From 2fd862431ebddd88d0addb6d042d0d6689407093 Mon Sep 17 00:00:00 2001 From: Matt Fishman Date: Sat, 12 Oct 2024 20:31:38 -0400 Subject: [PATCH 15/15] Update src/broadcast.jl --- src/broadcast.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index b53e9b7ccb..4f0848d580 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -421,9 +421,8 @@ function Base.copyto!( A, C = C, A end if !isnothing(A) && !isnothing(C) && !isnothing(α) && !isnothing(β) - + # The following fails to compile on some GPU backends. # map!((r, t) -> β * r + α * t, T, T, A) - map!(axpby(α, β), T, T, A) else bc_bc_α = find_type(Broadcasted, bc_α.args)