diff --git a/src/host/linalg.jl b/src/host/linalg.jl
index bdfab6516..4933839c6 100644
--- a/src/host/linalg.jl
+++ b/src/host/linalg.jl
@@ -687,8 +687,8 @@ function LinearAlgebra.rotate!(x::AbstractGPUArray, y::AbstractGPUArray, c::Numb
         i = @index(Global, Linear)
         @inbounds xi = x[i]
         @inbounds yi = y[i]
-        @inbounds x[i] =       c  * xi + s * yi
-        @inbounds y[i] = -conj(s) * xi + c * yi
+        @inbounds x[i] = s*yi +      c *xi
+        @inbounds y[i] = c*yi - conj(s)*xi 
     end
     rotate_kernel!(get_backend(x))(x, y, c, s; ndrange = size(x))
     return x, y
diff --git a/test/testsuite.jl b/test/testsuite.jl
index df68f31af..b48d7ccd6 100644
--- a/test/testsuite.jl
+++ b/test/testsuite.jl
@@ -46,7 +46,7 @@ function compare(f, AT::Type{<:AbstractGPUArray}, xs...; kwargs...)
 end
 
 function compare(f, AT::Type{<:Array}, xs...; kwargs...)
-    # no need to actually run this tests: we have nothing to compoare against,
+    # no need to actually run this tests: we have nothing to compare against,
     # and we'll run it on a CPU array anyhow when comparing to a GPU array.
     #
     # this method exists so that we can at least run the test suite with Array,
@@ -67,6 +67,8 @@ isrealtype(T) = T <: Real
 iscomplextype(T) = T <: Complex
 isrealfloattype(T) = T <: AbstractFloat
 isfloattype(T) = T <: AbstractFloat || T <: Complex{<:AbstractFloat}
+NaN_T(T::Type{<:AbstractFloat}) = T(NaN)
+NaN_T(T::Type{<:Complex{<:AbstractFloat}}) = T(NaN, NaN)
 
 # list of tests
 const tests = Dict()
diff --git a/test/testsuite/linalg.jl b/test/testsuite/linalg.jl
index 914b06b2c..c17868f81 100644
--- a/test/testsuite/linalg.jl
+++ b/test/testsuite/linalg.jl
@@ -391,3 +391,36 @@ end
         @test isrealfloattype(typeof(opnorm(AT(mat), p)))
     end
 end
+
+@testsuite "linalg/NaN_false" (AT, eltypes)->begin
+    eltypes = filter(T -> isfloattype(T), eltypes) # only floats have NaN
+    if AT <: AbstractGPUArray
+        @testset "rmul! / lmul!" for T in eltypes
+            y = invoke(rmul!, Tuple{AbstractGPUArray, Number}, adapt(AT, fill(NaN_T(T), 3)), false)
+            @test !any(isnan, collect(y))
+            y = invoke(lmul!, Tuple{Number, AbstractGPUArray}, false, adapt(AT, fill(NaN_T(T), 3)))
+            @test !any(isnan, collect(y))
+        end
+
+        @testset "axp{b}y!" for T in eltypes
+            y = invoke(axpby!, Tuple{Number, AbstractGPUArray, Number, AbstractGPUArray}, false, adapt(AT, fill(NaN_T(T), 3)), false, adapt(AT, fill(NaN_T(T), 3)))
+            @test !any(isnan, collect(y))
+            y = invoke(axpy!, Tuple{Number, AbstractGPUArray, AbstractGPUArray}, false, adapt(AT, fill(NaN_T(T), 3)), adapt(AT, rand(T, 3)))
+            @test !any(isnan, collect(y))
+        end
+
+        @testset "rotate! / reflect!" for T in eltypes
+            x, y = invoke(rotate!, Tuple{AbstractGPUArray, AbstractGPUArray, Number, Number}, adapt(AT, fill(NaN_T(T), 3)), adapt(AT, fill(NaN_T(T), 3)), false, false)
+            @test !any(isnan, collect(x))
+            @test !any(isnan, collect(y))
+            x, y = invoke(reflect!, Tuple{AbstractGPUArray, AbstractGPUArray, Number, Number}, adapt(AT, fill(NaN_T(T), 3)), adapt(AT, fill(NaN_T(T), 3)), false, false)
+            @test !any(isnan, collect(x))
+            @test !any(isnan, collect(y))
+        end
+
+        @testset "generic_matmatmul!" for T in eltypes
+            y = invoke(GPUArrays.generic_matmatmul!, Tuple{AbstractArray, AbstractArray, AbstractArray, Number, Number}, adapt(AT, fill(NaN_T(T), 3, 3)), adapt(AT, fill(NaN_T(T), 3, 3)), adapt(AT, fill(NaN_T(T), 3, 3)), false, false)
+            @test !any(isnan, collect(y))
+        end
+    end
+end