Broadcasting over BLAS1

haampie · haampie · commit 7992ed94b6a3 · 2017-12-27T19:42:49.000+01:00
diff --git a/src/bicgstabl.jl b/src/bicgstabl.jl
@@ -47,8 +47,7 @@ function bicgstabl_iterator!(x, A, b, l::Int = 2;
         copy!(residual, b)
     else
         A_mul_B!(residual, A, x)
-        @blas! residual -= one(T) * b
-        @blas! residual *= -one(T)
+        residual .= b .- residual
         mv_products += 1
     end
 
@@ -89,10 +88,7 @@ function next(it::BiCGStabIterable, iteration::Int)
         β = ρ / it.σ
         
         # us[:, 1 : j] .= rs[:, 1 : j] - β * us[:, 1 : j]
-        for i = 1 : j
-            @blas! view(it.us, :, i) *= -β
-            @blas! view(it.us, :, i) += one(T) * view(it.rs, :, i)
-        end
+        it.us[:, 1 : j] .= it.rs[:, 1 : j] .- β .* it.us[:, 1 : j]
 
         # us[:, j + 1] = Pl \ (A * us[:, j])
         next_u = view(it.us, :, j + 1)
@@ -102,18 +98,15 @@ function next(it::BiCGStabIterable, iteration::Int)
         it.σ = dot(it.r_shadow, next_u)
         α = ρ / it.σ
 
-        # rs[:, 1 : j] .= rs[:, 1 : j] - α * us[:, 2 : j + 1]
-        for i = 1 : j
-            @blas! view(it.rs, :, i) -= α * view(it.us, :, i + 1)
-        end
+        it.rs[:, 1 : j] .-= α .* it.us[:, 2 : j + 1]
         
         # rs[:, j + 1] = Pl \ (A * rs[:, j])
         next_r = view(it.rs, :, j + 1)
         A_mul_B!(next_r, it.A , view(it.rs, :, j))
         A_ldiv_B!(it.Pl, next_r)
         
         # x = x + α * us[:, 1]
-        @blas! it.x += α * view(it.us, :, 1)
+        it.x .+= α .* view(it.us, :, 1)
     end
 
     # Bookkeeping
diff --git a/src/cg.jl b/src/cg.jl
@@ -43,16 +43,15 @@ end
 function next(it::CGIterable, iteration::Int)
     # u := r + βu (almost an axpy)
     β = it.residual^2 / it.prev_residual^2
-    @blas! it.u *= β
-    @blas! it.u += one(eltype(it.u)) * it.r
+    it.u .= it.r .+ β .* it.u
 
     # c = A * u
     A_mul_B!(it.c, it.A, it.u)
     α = it.residual^2 / dot(it.u, it.c)
 
     # Improve solution and residual
-    @blas! it.x += α * it.u
-    @blas! it.r -= α * it.c
+    it.x .+= α .* it.u
+    it.r .-= α .* it.c
 
     it.prev_residual = it.residual
     it.residual = norm(it.r)
@@ -73,16 +72,15 @@ function next(it::PCGIterable, iteration::Int)
     
     # u := c + βu (almost an axpy)
     β = it.ρ / ρ_prev
-    @blas! it.u *= β
-    @blas! it.u += one(eltype(it.u)) * it.c
+    it.u .= it.c .+ β .* it.u
 
     # c = A * u
     A_mul_B!(it.c, it.A, it.u)
     α = it.ρ / dot(it.u, it.c)
 
     # Improve solution and residual
-    @blas! it.x += α * it.u
-    @blas! it.r -= α * it.c
+    it.x .+= α .* it.u
+    it.r .-= α .* it.c
 
     it.residual = norm(it.r)
 
@@ -110,7 +108,7 @@ function cg_iterator!(x, A, b, Pl = Identity();
     else
         mv_products = 1
         c = A * x
-        @blas! r -= one(eltype(x)) * c
+        r .-= c
         residual = norm(r)
         reltol = norm(b) * tol
     end
diff --git a/src/chebyshev.jl b/src/chebyshev.jl
@@ -37,17 +37,14 @@ function next(cheb::ChebyshevIterable, iteration::Int)
     else
         β = (cheb.λ_diff * cheb.α / 2) ^ 2
         cheb.α = inv(cheb.λ_avg - β)
-
-        # Almost an axpy u = c + βu
-        scale!(cheb.u, β)
-        @blas! cheb.u += one(T) * cheb.c
+        cheb.u .= cheb.c .+ β .* cheb.c
     end
 
     A_mul_B!(cheb.c, cheb.A, cheb.u)
     cheb.mv_products += 1
 
-    @blas! cheb.x += cheb.α * cheb.u
-    @blas! cheb.r -= cheb.α * cheb.c
+    cheb.x .+= cheb.α .* cheb.u
+    cheb.r .-= cheb.α .* cheb.c
 
     cheb.resnorm = norm(cheb.r)
 
@@ -73,7 +70,7 @@ function chebyshev_iterable!(x, A, b, λmin::Real, λmax::Real;
         mv_products = 0
     else
         A_mul_B!(c, A, x)
-        @blas! r -= one(T) * c
+        r .-= c
         resnorm = norm(r)
         reltol = tol * norm(b)
         mv_products = 1
diff --git a/src/gmres.jl b/src/gmres.jl
@@ -221,14 +221,14 @@ function init!(arnoldi::ArnoldiDecomp{T}, x, b, Pl, Ax; initially_zero::Bool = f
     # Potentially save one MV product
     if !initially_zero
         A_mul_B!(Ax, arnoldi.A, x)
-        @blas! first_col -= one(T) * Ax
+        first_col .-= Ax
     end
 
     A_ldiv_B!(Pl, first_col)
 
     # Normalize
     β = norm(first_col)
-    @blas! first_col *= one(T) / β
+    first_col .*= inv(β)
     β
 end
 
@@ -259,7 +259,7 @@ function update_solution!(x, y, arnoldi::ArnoldiDecomp{T}, Pr, k::Int, Ax) where
     # Computing x ← x + Pr \ (V * y) and use Ax as a work space
     A_mul_B!(Ax, view(arnoldi.V, :, 1 : k - 1), y)
     A_ldiv_B!(Pr, Ax)
-    @blas! x += one(T) * Ax
+    x .+= Ax
 end
 
 function expand!(arnoldi::ArnoldiDecomp, Pl::Identity, Pr::Identity, k::Int, Ax)
diff --git a/src/idrs.jl b/src/idrs.jl
@@ -111,32 +111,26 @@ function idrs_method!(log::ConvergenceHistory, X, op, args, C::T,
             # Solve small system and make v orthogonal to P
 
             c = LowerTriangular(M[k:s,k:s])\f[k:s]
-            @blas! V = G[k]
-            @blas! V *= c[1]
+            V .= c[1] .* G[k]
+            Q .= c[1] .* U[k]
 
-            @blas! Q = U[k]
-            @blas! Q *= c[1]
             for i = k+1:s
-                @blas! V += c[i-k+1]*G[i]
-                @blas! Q += c[i-k+1]*U[i]
+                V .+= c[i-k+1] .* G[i]
+                Q .+= c[i-k+1] .* U[i]
             end
 
             # Compute new U[:,k] and G[:,k], G[:,k] is in space G_j
+            V .= R .- V
 
-            #V = R - V
-            @blas! V *= -1.
-            @blas! V += R
-
-            @blas! U[k] = Q
-            @blas! U[k] += om*V
+            U[k] .= Q .+ om .* V
             G[k] = op(U[k], args...)
 
             # Bi-orthogonalise the new basis vectors
 
             for i in 1:k-1
                 alpha = vecdot(P[i],G[k])/M[i,i]
-                @blas! G[k] += -alpha*G[i]
-                @blas! U[k] += -alpha*U[i]
+                G[k] .-= alpha .* G[i]
+                U[k] .-= alpha .* U[i]
             end
 
             # New column of M = P'*G  (first k-1 entries are zero)
@@ -148,22 +142,17 @@ function idrs_method!(log::ConvergenceHistory, X, op, args, C::T,
             #  Make r orthogonal to q_i, i = 1..k
 
             beta = f[k]/M[k,k]
-            @blas! R += -beta*G[k]
-            @blas! X += beta*U[k]
+            R .-= beta .* G[k]
+            X .+= beta .* U[k]
 
             normR = vecnorm(R)
             if smoothing
-                # T_s = R_s - R
-                @blas! T_s = R_s
-                @blas! T_s += (-1.)*R
+                T_s .= R_s .- R
 
                 gamma = vecdot(R_s, T_s)/vecdot(T_s, T_s)
 
-                @blas! R_s += -gamma*T_s
-                # X_s = X_s - gamma*(X_s - X)
-                @blas! T_s = X_s
-                @blas! T_s += (-1.)*X
-                @blas! X_s += -gamma*T_s
+                R_s .-= gamma .* T_s
+                X_s .-= gamma .* (X_s .- X)
 
                 normR = vecnorm(R_s)
             end
@@ -182,34 +171,29 @@ function idrs_method!(log::ConvergenceHistory, X, op, args, C::T,
 
         # Now we have sufficient vectors in G_j to compute residual in G_j+1
         # Note: r is already perpendicular to P so v = r
-        @blas! V = R
+        copy!(V, R)
         Q = op(V, args...)::T
         om = omega(Q, R)
-        @blas! R += -om*Q
-        @blas! X += om*V
+        R .-= om .* Q
+        X .+= om .* V
 
         normR = vecnorm(R)
         if smoothing
-            # T_s = R_s - R
-            @blas! T_s = R_s
-            @blas! T_s += (-1.)*R
+            T_s .= R_s .- R
 
             gamma = vecdot(R_s, T_s)/vecdot(T_s, T_s)
 
-            @blas! R_s += -gamma*T_s
-            # X_s = X_s - gamma*(X_s - X)
-            @blas! T_s = X_s
-            @blas! T_s += (-1.)*X
-            @blas! X_s += -gamma*T_s
+            R_s .-= gamma .* T_s
+            X_s .-= gamma .* (X_s .- X)
 
             normR = vecnorm(R_s)
         end
         iter += 1
-        nextiter!(log,mvps=1)
+        nextiter!(log, mvps=1)
         push!(log, :resnorm, normR)
     end
     if smoothing
-        @blas! X = X_s
+        copy!(X, X_s)
     end
     verbose && @printf("\n")
     setconv(log, 0<=normR<tol)
diff --git a/src/lsmr.jl b/src/lsmr.jl
@@ -109,10 +109,10 @@ function lsmr_method!(log::ConvergenceHistory, x, A, b, v, h, hbar;
     # form the first vectors u and v (satisfy  β*u = b,  α*v = A'u)
     u = A_mul_B!(-1, A, x, 1, b)
     β = norm(u)
-    β > 0 && @blas! u *= inv(β)
+    u .*= inv(β)
     Ac_mul_B!(1, A, u, 0, v)
     α = norm(v)
-    α > 0 && @blas! v *= inv(α)
+    v .*= inv(α)
 
     log[:atol] = atol
     log[:btol] = btol
@@ -126,7 +126,7 @@ function lsmr_method!(log::ConvergenceHistory, x, A, b, v, h, hbar;
     cbar = one(Tr)
     sbar = zero(Tr)
 
-    @blas! h = v
+    copy!(h, v)
     fill!(hbar, zero(Tr))
 
     # Initialize variables for estimation of ||r||.
@@ -162,10 +162,10 @@ function lsmr_method!(log::ConvergenceHistory, x, A, b, v, h, hbar;
             β = norm(u)
             if β > 0
                 log.mtvps+=1
-                @blas! u *= inv(β)
+                u .*= inv(β)
                 Ac_mul_B!(1, A, u, -β, v)
                 α = norm(v)
-                α > 0 && @blas! v *= inv(α)
+                v .*= inv(α)
             end
 
             # Construct rotation Qhat_{k,2k+1}.
@@ -193,11 +193,9 @@ function lsmr_method!(log::ConvergenceHistory, x, A, b, v, h, hbar;
             ζbar = - sbar * ζbar
 
             # Update h, h_hat, x.
-            @blas! hbar *= - θbar * ρ / (ρold * ρbarold)
-            @blas! hbar += h
-            @blas! x += (ζ / (ρ * ρbar))*hbar
-            @blas! h *= - θnew / ρ
-            @blas! h += v
+            hbar .= hbar .* (-θbar * ρ / (ρold * ρbarold)) .+ h
+            x .+= (ζ / (ρ * ρbar)) * hbar
+            h .= h .* (-θnew / ρ) .+ v
 
             ##############################################################################
             ##
diff --git a/src/lsqr.jl b/src/lsqr.jl
@@ -125,12 +125,12 @@ function lsqr_method!(log::ConvergenceHistory, x, A, b;
     alpha = zero(Tr)
     if beta > 0
         log.mtvps=1
-        @blas! u *= inv(beta)
+        u .*= inv(beta)
         Ac_mul_B!(v,A,u)
         alpha = norm(v)
     end
     if alpha > 0
-        @blas! v *= inv(alpha)
+        v .*= inv(alpha)
     end
     w = copy(v)
     wrho = similar(w)
@@ -158,21 +158,19 @@ function lsqr_method!(log::ConvergenceHistory, x, A, b;
         # Note that the following three lines are a band aid for a GEMM: X: C := αAB + βC.
         # This is already supported in A_mul_B! for sparse and distributed matrices, but not yet dense
         A_mul_B!(tmpm, A, v)
-        @blas! u *= -alpha
-        @blas! u += one(eltype(tmpm))*tmpm
+        u .= -alpha .* u .+ tmpm
         beta = norm(u)
         if beta > 0
             log.mtvps+=1
-            @blas! u *= inv(beta)
+            u .*= inv(beta)
             Anorm = sqrt(abs2(Anorm) + abs2(alpha) + abs2(beta) + dampsq)
             # Note that the following three lines are a band aid for a GEMM: X: C := αA'B + βC.
             # This is already supported in Ac_mul_B! for sparse and distributed matrices, but not yet dense
             Ac_mul_B!(tmpn, A, u)
-            @blas! v *= -beta
-            @blas! v += one(eltype(tmpn))*tmpn
+            v .= -beta .* v .+ tmpn
             alpha  = norm(v)
             if alpha > 0
-                @blas! v *= inv(alpha)
+                v .*= inv(alpha)
             end
         end
 
@@ -199,11 +197,9 @@ function lsqr_method!(log::ConvergenceHistory, x, A, b;
         t1      =   phi  /rho
         t2      = - theta/rho
 
-        @blas! x += t1*w
-        @blas! w *= t2
-        @blas! w += one(t2)*v
-        @blas! wrho = w
-        @blas! wrho *= inv(rho)
+        x .+= t1*w
+        w = t2 .* w .+ v
+        wrho .= w .* inv(rho)
         ddnorm += norm(wrho)
 
         # Use a plane rotation on the right to eliminate the
diff --git a/src/orthogonalize.jl b/src/orthogonalize.jl
diff --git a/src/svdl.jl b/src/svdl.jl
diff --git a/test/lsmr.jl b/test/lsmr.jl