Skip to content

Commit 5c85f8a

Browse files
committed
rename julia -> threaded
1 parent 560c634 commit 5c85f8a

File tree

11 files changed

+42
-27
lines changed

11 files changed

+42
-27
lines changed

deps/build.jl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ info("""
22
This process will figure out which acceleration Packages you have installed
33
and therefore which backends GPUArrays can offer.
44
Theoretically available:
5-
:cudanative, :julia, :opencl
5+
:cudanative, :threaded, :opencl
66
7-
:julia is the default backend, which should always work.
7+
:threaded is the default backend, which should always work.
88
Just start Julia with:
99
`JULIA_NUM_THREADS=8 julia -O3` to get it some threads.
1010
8 is just an example and should be chosen depending on the processor you have.
@@ -13,7 +13,7 @@ acceleration, you might as well want optimization level 3!
1313
In the future, OpenCL, CUDA and OpenGL will be added as another backend.
1414
""")
1515

16-
supported_backends = [:julia]
16+
supported_backends = [:threaded]
1717

1818
cudanative_dir = get(ENV, "CUDANATIVE_PATH", Pkg.dir("CUDAnative"))
1919
install_cudanative = true
@@ -41,7 +41,8 @@ if !isdir(cudanative_dir)
4141
end
4242

4343
# Julia will always be available
44-
info("julia added as a backend.")
44+
info("threaded backend added.")
45+
4546
test_kernel() = nothing
4647
try
4748
using CUDAnative, CUDAdrv

src/backends/backends.jl

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,6 @@ end
5656

5757
################################
5858
# Device selection functions for e.g. devices(filterfuncs)
59-
is_gpu(ctx::Context) = is_gpu(ctx.device)
60-
is_cpu(ctx::Context) = is_cpu(ctx.device)
61-
has_atleast(ctx::Context, attribute, value) = has_atleast(ctx.device, attribute, value)
62-
6359
is_gpu(device) = false
6460
is_cpu(device) = false
6561
has_atleast(device, attribute, value) = attribute(ctx_or_device) >= value
@@ -70,7 +66,7 @@ has_atleast(device, attribute, value) = attribute(ctx_or_device) >= value
7066
# Works for context objects as well but is overloaded in the backends
7167
is_opencl(ctx::Symbol) = ctx == :opencl
7268
is_cudanative(ctx::Symbol) = ctx == :cudanative
73-
is_julia(ctx::Symbol) = ctx == :julia
69+
is_julia(ctx::Symbol) = ctx == :threaded
7470
is_opengl(ctx::Symbol) = ctx == :opengl
7571

7672
is_opencl(ctx) = false
@@ -79,6 +75,12 @@ is_julia(ctx) = false
7975
is_opengl(ctx) = false
8076

8177

78+
opencl(filterfuncs...) = init(:opencl, filterfuncs...)
79+
cudanative(filterfuncs...) = init(:cudanative, filterfuncs...)
80+
threaded(filterfuncs...) = init(:threaded, filterfuncs...)
81+
82+
export opencl, cudanative, threaded
83+
8284
"""
8385
Creates a new context from `device` without caching the resulting context.
8486
"""
@@ -95,7 +97,7 @@ end
9597

9698
function backend_module(sym::Symbol)
9799
if sym in supported_backends()
98-
if sym == :julia
100+
if sym == :threaded
99101
JLBackend
100102
elseif sym == :cudanative
101103
CUBackend
@@ -109,13 +111,16 @@ function backend_module(sym::Symbol)
109111
end
110112
end
111113
function init(sym::Symbol, args...; kw_args...)
112-
mod = backend_module(sym)
114+
init(backend_module(sym), args...; kw_args...)
115+
end
116+
function init(mod::Module, args...; kw_args...)
113117
setbackend!(mod)
114118
init(args...; kw_args...)
115119
end
116120

117121
function init(filterfuncs::Function...; kw_args...)
118122
devices = available_devices(filterfuncs...)
123+
devices = sort(devices, by = is_gpu) # prioritize gpu devices
119124
if isempty(devices)
120125
error("No device found for: $(join(string.(filterfuncs), " "))")
121126
end
@@ -140,7 +145,7 @@ current_context() = current_backend().current_context()
140145

141146
"""
142147
Sets the current backend to be used globally. Accepts the symbols:
143-
:cudanative, :opencl, :julia.
148+
:cudanative, :opencl, :threaded.
144149
"""
145150
function setbackend!(backend::Symbol)
146151
setbackend!(backend_module(backend))

src/backends/cudanative/cudanative.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ using ..GPUArrays, CUDAnative, StaticArrays
55
import CUDAdrv, CUDArt #, CUFFT
66

77
import GPUArrays: buffer, create_buffer, acc_mapreduce, is_cudanative
8-
import GPUArrays: Context, GPUArray, context, linear_index, gpu_call
8+
import GPUArrays: Context, GPUArray, context, linear_index, gpu_call, free_global_memory
99
import GPUArrays: blas_module, blasbuffer, is_blas_supported, hasblas, init
1010
import GPUArrays: default_buffer_type, broadcast_index, is_fft_supported, unsafe_reinterpret
1111
import GPUArrays: is_gpu, name, threads, blocks, global_memory, local_memory, new_context
@@ -32,7 +32,7 @@ end
3232

3333
devices() = CUDAdrv.devices()
3434
is_gpu(dev::CUDAdrv.CuDevice) = true
35-
name(dev::CUDAdrv.CuDevice) = CUDAdrv.name(dev)
35+
name(dev::CUDAdrv.CuDevice) = string("CU ", CUDAdrv.name(dev))
3636
threads(dev::CUDAdrv.CuDevice) = CUDAdrv.attribute(dev, CUDAdrv.MAX_THREADS_PER_BLOCK)
3737

3838
function blocks(dev::CUDAdrv.CuDevice)
@@ -43,6 +43,7 @@ function blocks(dev::CUDAdrv.CuDevice)
4343
)
4444
end
4545

46+
free_global_memory(dev::CUDAdrv.CuDevice) = CUDAdrv.Mem.info()[1]
4647
global_memory(dev::CUDAdrv.CuDevice) = CUDAdrv.totalmem(dev)
4748
local_memory(dev::CUDAdrv.CuDevice) = CUDAdrv.attribute(dev, CUDAdrv.TOTAL_CONSTANT_MEMORY)
4849

@@ -71,6 +72,7 @@ let contexts = Dict{CUDAdrv.CuDevice, CUContext}(), active_device = CUDAdrv.CuDe
7172
end
7273

7374
function GPUArrays.init(dev::CUDAdrv.CuDevice)
75+
GPUArrays.setbackend!(CUBackend)
7476
if isempty(active_device)
7577
push!(active_device, dev)
7678
else

src/backends/opencl/opencl.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ devices() = cl.devices()
4242
is_gpu(dev::cl.Device) = cl.info(dev, :device_type) == :gpu
4343
is_cpu(dev::cl.Device) = cl.info(dev, :device_type) == :cpu
4444

45-
name(dev::cl.Device) = cl.info(dev, :name)
45+
name(dev::cl.Device) = string("CL ", cl.info(dev, :name))
4646

4747
threads(dev::cl.Device) = cl.info(dev, :max_work_group_size) |> Int
4848
blocks(dev::cl.Device) = cl.info(dev, :max_work_item_size)
@@ -67,6 +67,7 @@ let contexts = Dict{cl.Device, CLContext}(), active_device = cl.Device[]
6767
end
6868
end
6969
function GPUArrays.init(dev::cl.Device)
70+
GPUArrays.setbackend!(CLBackend)
7071
if isempty(active_device)
7172
push!(active_device, dev)
7273
else
File renamed without changes.

src/backends/julia/julia.jl renamed to src/backends/threaded/threaded.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ let contexts = Dict{JLDevice, JLContext}(), active_device = JLDevice[]
3232
end
3333
current_context() = contexts[current_device()]
3434
function GPUArrays.init(dev::JLDevice)
35+
GPUArrays.setbackend!(JLBackend)
3536
if isempty(active_device)
3637
push!(active_device, dev)
3738
else
@@ -183,7 +184,7 @@ linear_index(A::AbstractArray, state) = state
183184
return
184185
end
185186
end
186-
function gpu_call(f, A::JLArray, args, globalsize = length(A), local_size = 0)
187+
function gpu_call(f, A::JLArray, args::Tuple, globalsize = length(A), local_size = 0)
187188
unpacked_args = unpack_buffer.(args)
188189
n = nthreads(A)
189190
len = prod(globalsize)

src/broadcast.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ end
5252
_broadcast!(f, C, keeps, Idefaults, A, Bs, Val{N}, iter)
5353
return C
5454
end
55+
function broadcast_t(f::Any, ::Type{Any}, ::Any, ::Any, A::GPUArrays.AbstractAccArray, args::Vararg{Any,N}) where N
56+
error("Return type couldn't be inferred for broadcast. Func: $f, $(typeof(A)), $args")
57+
end
5558

5659
function _broadcast!(
5760
func, out::AbstractAccArray,
@@ -67,6 +70,8 @@ function _broadcast!(
6770
out
6871
end
6972

73+
74+
7075
function Base.foreach(func, over::AbstractAccArray, Bs...)
7176
shape = Cuint.(size(over))
7277
keeps, Idefaults = map_newindexer(shape, over, Bs)

test/cudanative.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
using GPUArrays
22
using GPUArrays: free
33
using CUDAnative, Base.Test
4-
cuctx = GPUArrays.init(:cudanative)
4+
5+
cuctx = cudanative()
56
const cu = CUDAnative
67

78
# more complex function for broadcast

test/opencl.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
using Base.Test
22
using GPUArrays
33
using GPUArrays: free
4-
ctx = CLBackend.init()
4+
5+
ctx = opencl()
56

67
# more complex function for broadcast
78
function test{T}(a::T, b)

test/runtests.jl

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
#TODO remove before merging
2-
Pkg.checkout("Transpiler")
31
using GPUArrays
42
using Base.Test
53
srand(42) # set random seed for reproducability
@@ -9,6 +7,8 @@ function jltest(a, b)
97
y*10f0
108
end
119

10+
11+
1212
function log_gpu_mem()
1313
if :cudanative in supported_backends()
1414
info("GPUMem: ", CUDAdrv.Mem.used() / 10^6)
@@ -17,13 +17,13 @@ function log_gpu_mem()
1717
end
1818
end
1919

20-
macro allbackends(title, backendname::Symbol, block)
20+
macro allbackends(title, ctxname::Symbol, block)
2121
quote
2222
for device in GPUArrays.all_devices()
2323
dname = GPUArrays.name(device)
2424
@testset "$($(esc(title))) $dname" begin
2525
ctx = GPUArrays.init(device)
26-
$(esc(backendname)) = ctx
26+
$(esc(ctxname)) = ctx
2727
$(esc(block))
2828
end
2929
log_gpu_mem()
@@ -35,9 +35,10 @@ end
3535
include("broadcast.jl")
3636
end
3737

38+
3839
# Only test supported backends!
3940
for backend in supported_backends()
40-
if backend in (:opencl, :julia, :cudanative)
41+
if backend in (:opencl, :threaded, :cudanative)
4142
@testset "$backend" begin
4243
include("$(backend).jl")
4344
end
@@ -63,6 +64,3 @@ log_gpu_mem()
6364
include("fft.jl")
6465
end
6566
log_gpu_mem()
66-
67-
68-
using GPUArrays

0 commit comments

Comments
 (0)