rename julia -> threaded

SimonDanisch · SimonDanisch · commit 5c85f8aab315 · 2017-08-31T12:20:17.000+02:00
diff --git a/deps/build.jl b/deps/build.jl
@@ -2,9 +2,9 @@ info("""
 This process will figure out which acceleration Packages you have installed
 and therefore which backends GPUArrays can offer.
 Theoretically available:
-:cudanative, :julia, :opencl
+:cudanative, :threaded, :opencl
 
-:julia is the default backend, which should always work.
+:threaded is the default backend, which should always work.
 Just start Julia with:
 `JULIA_NUM_THREADS=8 julia -O3` to get it some threads.
 8 is just an example and should be chosen depending on the processor you have.
@@ -13,7 +13,7 @@ acceleration, you might as well want optimization level 3!
 In the future, OpenCL, CUDA and OpenGL will be added as another backend.
 """)
 
-supported_backends = [:julia]
+supported_backends = [:threaded]
 
 cudanative_dir = get(ENV, "CUDANATIVE_PATH", Pkg.dir("CUDAnative"))
 install_cudanative = true
@@ -41,7 +41,8 @@ if !isdir(cudanative_dir)
 end
 
 # Julia will always be available
-info("julia added as a backend.")
+info("threaded backend added.")
+
 test_kernel() = nothing
 try
     using CUDAnative, CUDAdrv
diff --git a/src/backends/backends.jl b/src/backends/backends.jl
@@ -56,10 +56,6 @@ end
 
 ################################
 # Device selection functions for e.g. devices(filterfuncs)
-is_gpu(ctx::Context) = is_gpu(ctx.device)
-is_cpu(ctx::Context) = is_cpu(ctx.device)
-has_atleast(ctx::Context, attribute, value) = has_atleast(ctx.device, attribute, value)
-
 is_gpu(device) = false
 is_cpu(device) = false
 has_atleast(device, attribute, value) = attribute(ctx_or_device) >= value
@@ -70,7 +66,7 @@ has_atleast(device, attribute, value) = attribute(ctx_or_device) >= value
 # Works for context objects as well but is overloaded in the backends
 is_opencl(ctx::Symbol) = ctx == :opencl
 is_cudanative(ctx::Symbol) =  ctx == :cudanative
-is_julia(ctx::Symbol) =  ctx == :julia
+is_julia(ctx::Symbol) =  ctx == :threaded
 is_opengl(ctx::Symbol) =  ctx == :opengl
 
 is_opencl(ctx) = false
@@ -79,6 +75,12 @@ is_julia(ctx) = false
 is_opengl(ctx) = false
 
 
+opencl(filterfuncs...) = init(:opencl, filterfuncs...)
+cudanative(filterfuncs...) = init(:cudanative, filterfuncs...)
+threaded(filterfuncs...) = init(:threaded, filterfuncs...)
+
+export opencl, cudanative, threaded
+
 """
 Creates a new context from `device` without caching the resulting context.
 """
@@ -95,7 +97,7 @@ end
 
 function backend_module(sym::Symbol)
     if sym in supported_backends()
-        if sym == :julia
+        if sym == :threaded
             JLBackend
         elseif sym == :cudanative
             CUBackend
@@ -109,13 +111,16 @@ function backend_module(sym::Symbol)
     end
 end
 function init(sym::Symbol, args...; kw_args...)
-    mod = backend_module(sym)
+    init(backend_module(sym), args...; kw_args...)
+end
+function init(mod::Module, args...; kw_args...)
     setbackend!(mod)
     init(args...; kw_args...)
 end
 
 function init(filterfuncs::Function...; kw_args...)
     devices = available_devices(filterfuncs...)
+    devices = sort(devices, by = is_gpu) # prioritize gpu devices
     if isempty(devices)
         error("No device found for: $(join(string.(filterfuncs), " "))")
     end
@@ -140,7 +145,7 @@ current_context() = current_backend().current_context()
 
 """
 Sets the current backend to be used globally. Accepts the symbols:
-:cudanative, :opencl, :julia.
+:cudanative, :opencl, :threaded.
 """
 function setbackend!(backend::Symbol)
     setbackend!(backend_module(backend))
diff --git a/src/backends/cudanative/cudanative.jl b/src/backends/cudanative/cudanative.jl
@@ -5,7 +5,7 @@ using ..GPUArrays, CUDAnative, StaticArrays
 import CUDAdrv, CUDArt #, CUFFT
 
 import GPUArrays: buffer, create_buffer, acc_mapreduce, is_cudanative
-import GPUArrays: Context, GPUArray, context, linear_index, gpu_call
+import GPUArrays: Context, GPUArray, context, linear_index, gpu_call, free_global_memory
 import GPUArrays: blas_module, blasbuffer, is_blas_supported, hasblas, init
 import GPUArrays: default_buffer_type, broadcast_index, is_fft_supported, unsafe_reinterpret
 import GPUArrays: is_gpu, name, threads, blocks, global_memory, local_memory, new_context
@@ -32,7 +32,7 @@ end
 
 devices() = CUDAdrv.devices()
 is_gpu(dev::CUDAdrv.CuDevice) = true
-name(dev::CUDAdrv.CuDevice) = CUDAdrv.name(dev)
+name(dev::CUDAdrv.CuDevice) = string("CU ", CUDAdrv.name(dev))
 threads(dev::CUDAdrv.CuDevice) = CUDAdrv.attribute(dev, CUDAdrv.MAX_THREADS_PER_BLOCK)
 
 function blocks(dev::CUDAdrv.CuDevice)
@@ -43,6 +43,7 @@ function blocks(dev::CUDAdrv.CuDevice)
     )
 end
 
+free_global_memory(dev::CUDAdrv.CuDevice) = CUDAdrv.Mem.info()[1]
 global_memory(dev::CUDAdrv.CuDevice) = CUDAdrv.totalmem(dev)
 local_memory(dev::CUDAdrv.CuDevice) = CUDAdrv.attribute(dev, CUDAdrv.TOTAL_CONSTANT_MEMORY)
 
@@ -71,6 +72,7 @@ let contexts = Dict{CUDAdrv.CuDevice, CUContext}(), active_device = CUDAdrv.CuDe
     end
 
     function GPUArrays.init(dev::CUDAdrv.CuDevice)
+        GPUArrays.setbackend!(CUBackend)
         if isempty(active_device)
             push!(active_device, dev)
         else
diff --git a/src/backends/opencl/opencl.jl b/src/backends/opencl/opencl.jl
@@ -42,7 +42,7 @@ devices() = cl.devices()
 is_gpu(dev::cl.Device) = cl.info(dev, :device_type) == :gpu
 is_cpu(dev::cl.Device) = cl.info(dev, :device_type) == :cpu
 
-name(dev::cl.Device) = cl.info(dev, :name)
+name(dev::cl.Device) = string("CL ", cl.info(dev, :name))
 
 threads(dev::cl.Device) = cl.info(dev, :max_work_group_size) |> Int
 blocks(dev::cl.Device) = cl.info(dev, :max_work_item_size)
@@ -67,6 +67,7 @@ let contexts = Dict{cl.Device, CLContext}(), active_device = cl.Device[]
         end
     end
     function GPUArrays.init(dev::cl.Device)
+        GPUArrays.setbackend!(CLBackend)
         if isempty(active_device)
             push!(active_device, dev)
         else
diff --git a/src/backends/threaded/fft.jl b/src/backends/threaded/fft.jl
diff --git a/src/backends/threaded/threaded.jl b/src/backends/threaded/threaded.jl
@@ -32,6 +32,7 @@ let contexts = Dict{JLDevice, JLContext}(), active_device = JLDevice[]
     end
     current_context() = contexts[current_device()]
     function GPUArrays.init(dev::JLDevice)
+        GPUArrays.setbackend!(JLBackend)
         if isempty(active_device)
             push!(active_device, dev)
         else
@@ -183,7 +184,7 @@ linear_index(A::AbstractArray, state) = state
         return
     end
 end
-function gpu_call(f, A::JLArray, args, globalsize = length(A), local_size = 0)
+function gpu_call(f, A::JLArray, args::Tuple, globalsize = length(A), local_size = 0)
     unpacked_args = unpack_buffer.(args)
     n = nthreads(A)
     len = prod(globalsize)
diff --git a/src/broadcast.jl b/src/broadcast.jl
@@ -52,6 +52,9 @@ end
     _broadcast!(f, C, keeps, Idefaults, A, Bs, Val{N}, iter)
     return C
 end
+function broadcast_t(f::Any, ::Type{Any}, ::Any, ::Any, A::GPUArrays.AbstractAccArray, args::Vararg{Any,N}) where N
+    error("Return type couldn't be inferred for broadcast. Func: $f, $(typeof(A)), $args")
+end
 
 function _broadcast!(
         func, out::AbstractAccArray,
@@ -67,6 +70,8 @@ function _broadcast!(
     out
 end
 
+
+
 function Base.foreach(func, over::AbstractAccArray, Bs...)
     shape = Cuint.(size(over))
     keeps, Idefaults = map_newindexer(shape, over, Bs)
diff --git a/test/cudanative.jl b/test/cudanative.jl
@@ -1,7 +1,8 @@
 using GPUArrays
 using GPUArrays: free
 using CUDAnative, Base.Test
-cuctx = GPUArrays.init(:cudanative)
+
+cuctx = cudanative()
 const cu = CUDAnative
 
 # more complex function for broadcast
diff --git a/test/opencl.jl b/test/opencl.jl
@@ -1,7 +1,8 @@
 using Base.Test
 using GPUArrays
 using GPUArrays: free
-ctx = CLBackend.init()
+
+ctx = opencl()
 
 # more complex function for broadcast
 function test{T}(a::T, b)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,5 +1,3 @@
-#TODO remove before merging
-Pkg.checkout("Transpiler")
 using GPUArrays
 using Base.Test
 srand(42) # set random seed for reproducability
@@ -9,6 +7,8 @@ function jltest(a, b)
     y*10f0
 end
 
+
+
 function log_gpu_mem()
     if :cudanative in supported_backends()
         info("GPUMem: ", CUDAdrv.Mem.used() / 10^6)
@@ -17,13 +17,13 @@ function log_gpu_mem()
     end
 end
 
-macro allbackends(title, backendname::Symbol, block)
+macro allbackends(title, ctxname::Symbol, block)
     quote
         for device in GPUArrays.all_devices()
             dname = GPUArrays.name(device)
             @testset "$($(esc(title))) $dname" begin
                 ctx = GPUArrays.init(device)
-                $(esc(backendname)) = ctx
+                $(esc(ctxname)) = ctx
                 $(esc(block))
             end
             log_gpu_mem()
@@ -35,9 +35,10 @@ end
     include("broadcast.jl")
 end
 
+
 # Only test supported backends!
 for backend in supported_backends()
-    if backend in (:opencl, :julia, :cudanative)
+    if backend in (:opencl, :threaded, :cudanative)
         @testset "$backend" begin
             include("$(backend).jl")
         end
@@ -63,6 +64,3 @@ log_gpu_mem()
     include("fft.jl")
 end
 log_gpu_mem()
-
-
-using GPUArrays
diff --git a/test/threaded.jl b/test/threaded.jl
@@ -1,7 +1,7 @@
 using GPUArrays
 using Base.Test
 
-GPUArrays.init(:julia)
+ctx = threaded()