|
| 1 | +function Base.ctranspose{T}(D::DArray{T,2}) |
| 2 | + DArray(reverse(size(D)), procs(D)) do I |
| 3 | + lp = Array(T, map(length, I)) |
| 4 | + rp = convert(Array, D[reverse(I)...]) |
| 5 | + ctranspose!(lp, rp) |
| 6 | + end |
| 7 | +end |
| 8 | + |
| 9 | +function Base.transpose{T}(D::DArray{T,2}) |
| 10 | + DArray(reverse(size(D)), procs(D)) do I |
| 11 | + lp = Array(T, map(length, I)) |
| 12 | + rp = convert(Array, D[reverse(I)...]) |
| 13 | + transpose!(lp, rp) |
| 14 | + end |
| 15 | +end |
| 16 | + |
| 17 | +typealias DVector{T,A} DArray{T,1,A} |
| 18 | +typealias DMatrix{T,A} DArray{T,2,A} |
| 19 | + |
| 20 | +# Level 1 |
| 21 | + |
| 22 | +function axpy!(α, x::DVector, y::DVector) |
| 23 | + if length(x) != length(y) |
| 24 | + throw(DimensionMismatch("vectors must have same length")) |
| 25 | + end |
| 26 | + @sync for p in procs(y) |
| 27 | + @async remotecall_fetch(() -> (Base.axpy!(α, localpart(x), localpart(y)); nothing), p) |
| 28 | + end |
| 29 | + return y |
| 30 | +end |
| 31 | + |
| 32 | +function dot(x::DVector, y::DVector) |
| 33 | + if length(x) != length(y) |
| 34 | + throw(DimensionMismatch("")) |
| 35 | + end |
| 36 | + if (procs(x) != procs(y)) || (x.cuts != y.cuts) |
| 37 | + throw(ArgumentError("vectors don't have the same distribution. Not handled for efficiency reasons.")) |
| 38 | + end |
| 39 | + |
| 40 | + results=Any[] |
| 41 | + @sync begin |
| 42 | + for i = eachindex(x.pids) |
| 43 | + @async push!(results, remotecall_fetch((x, y, i) -> dot(localpart(x), fetch(y, i)), x.pids[i], x, y, i)) |
| 44 | + end |
| 45 | + end |
| 46 | + return reduce(@functorize(+), results) |
| 47 | +end |
| 48 | + |
| 49 | +function norm(x::DVector, p::Real = 2) |
| 50 | + results = [] |
| 51 | + @sync begin |
| 52 | + for pp in procs(x) |
| 53 | + @async push!(results, remotecall_fetch(() -> norm(localpart(x), p), pp)) |
| 54 | + end |
| 55 | + end |
| 56 | + return norm(results, p) |
| 57 | +end |
| 58 | + |
| 59 | +Base.scale!(A::DArray, x::Number) = begin |
| 60 | + @sync for p in procs(A) |
| 61 | + @async remotecall_fetch((A,x)->(scale!(localpart(A), x); nothing), p, A, x) |
| 62 | + end |
| 63 | + return A |
| 64 | +end |
| 65 | + |
| 66 | +# Level 2 |
| 67 | +function add!(dest, src, scale = one(dest[1])) |
| 68 | + if length(dest) != length(src) |
| 69 | + throw(DimensionMismatch("source and destination arrays must have same number of elements")) |
| 70 | + end |
| 71 | + if scale == one(scale) |
| 72 | + @simd for i = eachindex(dest) |
| 73 | + @inbounds dest[i] += src[i] |
| 74 | + end |
| 75 | + else |
| 76 | + @simd for i = eachindex(dest) |
| 77 | + @inbounds dest[i] += scale*src[i] |
| 78 | + end |
| 79 | + end |
| 80 | + return dest |
| 81 | +end |
| 82 | + |
| 83 | +function A_mul_B!(α::Number, A::DMatrix, x::AbstractVector, β::Number, y::DVector) |
| 84 | + |
| 85 | + # error checks |
| 86 | + if size(A, 2) != length(x) |
| 87 | + throw(DimensionMismatch("")) |
| 88 | + end |
| 89 | + if y.cuts[1] != A.cuts[1] |
| 90 | + throw(ArgumentError("cuts of output vector must match cuts of first dimension of matrix")) |
| 91 | + end |
| 92 | + |
| 93 | + # Multiply on each tile of A |
| 94 | + R = Array(Future, size(A.pids)...) |
| 95 | + for j = 1:size(A.pids, 2) |
| 96 | + xj = x[A.cuts[2][j]:A.cuts[2][j + 1] - 1] |
| 97 | + for i = 1:size(A.pids, 1) |
| 98 | + R[i,j] = remotecall(procs(A)[i,j]) do |
| 99 | + localpart(A)*convert(localtype(x), xj) |
| 100 | + end |
| 101 | + end |
| 102 | + end |
| 103 | + |
| 104 | + # Scale y if necessary |
| 105 | + if β != one(β) |
| 106 | + @sync for p in y.pids |
| 107 | + if β != zero(β) |
| 108 | + @async remotecall_fetch(y -> (scale!(localpart(y), β); nothing), p, y) |
| 109 | + else |
| 110 | + @async remotecall_fetch(y -> (fill!(localpart(y), 0); nothing), p, y) |
| 111 | + end |
| 112 | + end |
| 113 | + end |
| 114 | + |
| 115 | + # Update y |
| 116 | + @sync for i = 1:size(R, 1) |
| 117 | + p = y.pids[i] |
| 118 | + for j = 1:size(R, 2) |
| 119 | + rij = R[i,j] |
| 120 | + @async remotecall_fetch(() -> (add!(localpart(y), fetch(rij), α); nothing), p) |
| 121 | + end |
| 122 | + end |
| 123 | + |
| 124 | + return y |
| 125 | +end |
| 126 | + |
| 127 | +function Ac_mul_B!(α::Number, A::DMatrix, x::AbstractVector, β::Number, y::DVector) |
| 128 | + |
| 129 | + # error checks |
| 130 | + if size(A, 1) != length(x) |
| 131 | + throw(DimensionMismatch("")) |
| 132 | + end |
| 133 | + if y.cuts[1] != A.cuts[2] |
| 134 | + throw(ArgumentError("cuts of output vector must match cuts of second dimension of matrix")) |
| 135 | + end |
| 136 | + |
| 137 | + # Multiply on each tile of A |
| 138 | + R = Array(Future, reverse(size(A.pids))...) |
| 139 | + for j = 1:size(A.pids, 1) |
| 140 | + xj = x[A.cuts[1][j]:A.cuts[1][j + 1] - 1] |
| 141 | + for i = 1:size(A.pids, 2) |
| 142 | + R[i,j] = remotecall(() -> localpart(A)'*convert(localtype(x), xj), procs(A)[j,i]) |
| 143 | + end |
| 144 | + end |
| 145 | + |
| 146 | + # Scale y if necessary |
| 147 | + if β != one(β) |
| 148 | + @sync for p in y.pids |
| 149 | + if β != zero(β) |
| 150 | + @async remotecall_fetch(() -> (scale!(localpart(y), β); nothing), p) |
| 151 | + else |
| 152 | + @async remotecall_fetch(() -> (fill!(localpart(y), 0); nothing), p) |
| 153 | + end |
| 154 | + end |
| 155 | + end |
| 156 | + |
| 157 | + # Update y |
| 158 | + @sync for i = 1:size(R, 1) |
| 159 | + p = y.pids[i] |
| 160 | + for j = 1:size(R, 2) |
| 161 | + rij = R[i,j] |
| 162 | + @async remotecall_fetch(() -> (add!(localpart(y), fetch(rij), α); nothing), p) |
| 163 | + end |
| 164 | + end |
| 165 | + return y |
| 166 | +end |
| 167 | + |
| 168 | + |
| 169 | +# Level 3 |
| 170 | +function _matmatmul!(α::Number, A::DMatrix, B::AbstractMatrix, β::Number, C::DMatrix, tA) |
| 171 | + # error checks |
| 172 | + Ad1, Ad2 = (tA == 'N') ? (1,2) : (2,1) |
| 173 | + mA, nA = size(A, Ad1, Ad2) |
| 174 | + mB, nB = size(B) |
| 175 | + if mB != nA |
| 176 | + throw(DimensionMismatch("matrix A has dimensions ($mA, $nA), matrix B has dimensions ($mB, $nB)")) |
| 177 | + end |
| 178 | + if size(C,1) != mA || size(C,2) != nB |
| 179 | + throw(DimensionMismatch("result C has dimensions $(size(C)), needs ($mA, $nB)")) |
| 180 | + end |
| 181 | + if C.cuts[1] != A.cuts[Ad1] |
| 182 | + throw(ArgumentError("cuts of the first dimension of the output matrix must match cuts of dimension $Ad1 of the first input matrix")) |
| 183 | + end |
| 184 | + |
| 185 | + # Multiply on each tile of A |
| 186 | + if tA == 'N' |
| 187 | + R = Array(Future, size(procs(A))..., size(procs(C), 2)) |
| 188 | + else |
| 189 | + R = Array(Future, reverse(size(procs(A)))..., size(procs(C), 2)) |
| 190 | + end |
| 191 | + for j = 1:size(A.pids, Ad2) |
| 192 | + for k = 1:size(C.pids, 2) |
| 193 | + Acuts = A.cuts[Ad2] |
| 194 | + Ccuts = C.cuts[2] |
| 195 | + Bjk = B[Acuts[j]:Acuts[j + 1] - 1, Ccuts[k]:Ccuts[k + 1] - 1] |
| 196 | + for i = 1:size(A.pids, Ad1) |
| 197 | + p = (tA == 'N') ? procs(A)[i,j] : procs(A)[j,i] |
| 198 | + R[i,j,k] = remotecall(p) do |
| 199 | + if tA == 'T' |
| 200 | + return localpart(A).'*convert(localtype(B), Bjk) |
| 201 | + elseif tA == 'C' |
| 202 | + return localpart(A)'*convert(localtype(B), Bjk) |
| 203 | + else |
| 204 | + return localpart(A)*convert(localtype(B), Bjk) |
| 205 | + end |
| 206 | + end |
| 207 | + end |
| 208 | + end |
| 209 | + end |
| 210 | + |
| 211 | + # Scale C if necessary |
| 212 | + if β != one(β) |
| 213 | + @sync for p in C.pids |
| 214 | + if β != zero(β) |
| 215 | + @async remotecall_fetch(() -> (scale!(localpart(C), β); nothing), p) |
| 216 | + else |
| 217 | + @async remotecall_fetch(() -> (fill!(localpart(C), 0); nothing), p) |
| 218 | + end |
| 219 | + end |
| 220 | + end |
| 221 | + |
| 222 | + # Update C |
| 223 | + @sync for i = 1:size(R, 1) |
| 224 | + for k = 1:size(C.pids, 2) |
| 225 | + p = C.pids[i,k] |
| 226 | + for j = 1:size(R, 2) |
| 227 | + rijk = R[i,j,k] |
| 228 | + @async remotecall_fetch(d -> (add!(localpart(d), fetch(rijk), α); nothing), p, C) |
| 229 | + end |
| 230 | + end |
| 231 | + end |
| 232 | + return C |
| 233 | +end |
| 234 | + |
| 235 | +A_mul_B!(α::Number, A::DMatrix, B::AbstractMatrix, β::Number, C::DMatrix) = _matmatmul!(α, A, B, β, C, 'N') |
| 236 | +Ac_mul_B!(α::Number, A::DMatrix, B::AbstractMatrix, β::Number, C::DMatrix) = _matmatmul!(α, A, B, β, C, 'C') |
| 237 | +At_mul_B!(α::Number, A::DMatrix, B::AbstractMatrix, β::Number, C::DMatrix) = _matmatmul!(α, A, B, β, C, 'T') |
| 238 | +At_mul_B!(C::DMatrix, A::DMatrix, B::AbstractMatrix) = At_mul_B!(one(eltype(C)), A, B, zero(eltype(C)), C) |
| 239 | + |
| 240 | +function (*)(A::DMatrix, x::AbstractVector) |
| 241 | + T = promote_type(Base.LinAlg.arithtype(eltype(A)), Base.LinAlg.arithtype(eltype(x))) |
| 242 | + y = DArray(I -> Array(T, map(length, I)), (size(A, 1),), procs(A)[:,1], (size(procs(A), 1),)) |
| 243 | + return A_mul_B!(one(T), A, x, zero(T), y) |
| 244 | +end |
| 245 | +function (*)(A::DMatrix, B::AbstractMatrix) |
| 246 | + T = promote_type(Base.LinAlg.arithtype(eltype(A)), Base.LinAlg.arithtype(eltype(B))) |
| 247 | + C = DArray(I -> Array(T, map(length, I)), (size(A, 1), size(B, 2)), procs(A)[:,1:min(size(procs(A), 2), size(procs(B), 2))], (size(procs(A), 1), min(size(procs(A), 2), size(procs(B), 2)))) |
| 248 | + return A_mul_B!(one(T), A, B, zero(T), C) |
| 249 | +end |
| 250 | + |
| 251 | +function Ac_mul_B(A::DMatrix, x::AbstractVector) |
| 252 | + T = promote_type(Base.LinAlg.arithtype(eltype(A)), Base.LinAlg.arithtype(eltype(x))) |
| 253 | + y = DArray(I -> Array(T, map(length, I)), (size(A, 2),), procs(A)[1,:], (size(procs(A), 2),)) |
| 254 | + return Ac_mul_B!(one(T), A, x, zero(T), y) |
| 255 | +end |
| 256 | +function Ac_mul_B(A::DMatrix, B::AbstractMatrix) |
| 257 | + T = promote_type(Base.LinAlg.arithtype(eltype(A)), Base.LinAlg.arithtype(eltype(B))) |
| 258 | + C = DArray(I -> Array(T, map(length, I)), (size(A, 2), size(B, 2)), procs(A)[1:min(size(procs(A), 1), size(procs(B), 2)),:], (size(procs(A), 2), min(size(procs(A), 1), size(procs(B), 2)))) |
| 259 | + return Ac_mul_B!(one(T), A, B, zero(T), C) |
| 260 | +end |
0 commit comments