We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 78afd1a commit 03c040dCopy full SHA for 03c040d
src/host/linalg.jl
@@ -110,16 +110,11 @@ function generic_matmatmul!(C::AnyGPUArray{R}, A::AnyGPUArray{T}, B::AnyGPUArray
110
return fill!(C, zero(R))
111
end
112
113
- # reshape vectors to matrices
114
- A′ = reshape(A, (size(A,1), size(A,2)))
115
- B′ = reshape(B, (size(B,1), size(B,2)))
116
- C′= reshape(C, (size(C,1), size(C,2)))
117
-
118
- gpu_call(C′, A′, B′; name="matmatmul!") do ctx, C, A, B
+ gpu_call(C, A, B; name="matmatmul!") do ctx, C, A, B
119
idx = @linearidx C
120
- i, j = Tuple(CartesianIndices(C)[idx])
+ i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
121
122
- if i <= size(A,1) && j <= size(B,2)
+ @inbounds if i <= size(A,1) && j <= size(B,2)
123
z2 = zero(A[i, 1]*B[1, j] + A[i, 1]*B[1, j])
124
Ctmp = convert(promote_type(R, typeof(z2)), z2)
125
for k in 1:size(A,2)
0 commit comments