Skip to content
This repository was archived by the owner on Sep 27, 2021. It is now read-only.

Commit f780bab

Browse files
committed
fix bugs and #5
1 parent 534f164 commit f780bab

File tree

4 files changed

+104
-16
lines changed

4 files changed

+104
-16
lines changed

src/array.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@ import Base: pointer, similar, size, copy!, convert
99
using Base: RefValue
1010

1111
mutable struct CLArray{T, N} <: GPUArray{T, N}
12-
ptr::OwnedPtr{T}
1312
size::NTuple{N, Cuint}
13+
ptr::OwnedPtr{T}
1414
end
1515

1616

1717
# arguments are swapped to not override default constructor
18-
function (::Type{CLArray{T, N}})(size::NTuple{N, Integer}, ptr::OwnedPtr{T}) where {T, N}
19-
arr = CLArray{T, N}(ptr, size)
18+
function (::Type{CLArray{T, N}})(ptr::OwnedPtr{T}, size::NTuple{N, Integer}) where {T, N}
19+
arr = CLArray{T, N}(size, ptr)
2020
finalizer(arr, unsafe_free!)
2121
arr
2222
end
@@ -32,14 +32,14 @@ module Shorthands
3232
cl(xs::AbstractArray) = isbits(xs) ? xs : CLArrays.CLArray(xs)
3333
end
3434

35-
function (::Type{CLArray{T, N}})(size::NTuple{N, Integer}, ctx = global_context()) where {T, N}
35+
function (::Type{CLArray{T, N}})(size::NTuple{N, Integer}, ctx::cl.Context = global_context()) where {T, N}
3636
# element type has different padding from cl type in julia
3737
# for fixedsize arrays we use vload/vstore, so we can use it packed
38-
clT = !Transpiler.is_fixedsize_array(T) ? cl.packed_convert(T) : T
38+
clT = T #!Transpiler.is_fixedsize_array(T) ? cl.packed_convert(T) : T
3939
elems = prod(size)
4040
elems = elems == 0 ? 1 : elems # OpenCL can't allocate 0 sized buffers
4141
ptr = Mem.alloc(clT, elems, ctx)
42-
CLArray{clT, N}(size, ptr)
42+
CLArray{clT, N}(ptr, size)
4343
end
4444

4545
raw_print(msg::AbstractString...) =

src/compilation.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,15 @@ function contains_pointer(
8484
push!(pointer_fields, (parent_field..., fname))
8585
hasptr = true
8686
else
87-
_hasptr, pointer_fields = contains_pointer(FT, (fname,), pointer_fields)
87+
_hasptr, pointer_fields = contains_pointer(FT, (parent_field..., fname), pointer_fields)
8888
hasptr |= _hasptr
8989
end
9090
end
9191
hasptr, pointer_fields
9292
end
9393

9494
get_fields_type(T, fields::Tuple{X}) where X = fieldtype(T, first(fields))
95-
get_fields_type(T, fields::Tuple{Vararg{Any, N}}) where N = get_fields_type(fieldtype(T, first(fields)), Base.tail(fields))
95+
get_fields_type(T, fields::NTuple{N, Any}) where N = get_fields_type(fieldtype(T, first(fields)), Base.tail(fields))
9696

9797
get_fields(x, fields::NTuple{1}) = getfield(x, first(fields))
9898
get_fields(x, fields::NTuple{N, Any}) where N = get_fields(getfield(x, first(fields)), Base.tail(fields))
@@ -250,6 +250,7 @@ function CLFunction(f::F, args::T, ctx = global_context()) where {T, F}
250250
get!(compiled_functions, (ctx.id, f, cltypes)) do # TODO make this faster
251251
method = CLMethod((f, cltypes))
252252
source, fname, ptr_extract = assemble_kernel(method)
253+
# println(source)
253254
options = "-cl-denorms-are-zero -cl-mad-enable -cl-unsafe-math-optimizations"
254255
if version > v"1.2"
255256
options *= " -cl-std=CL1.2"

src/ondevice.jl

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ using GPUArrays: AbstractDeviceArray
1414
Array type on the device
1515
"""
1616
struct DeviceArray{T, N, Ptr} <: AbstractDeviceArray{T, N}
17-
ptr::Ptr
1817
size::NTuple{N, Cuint}
18+
ptr::Ptr
1919
end
2020
# shaninagans for uploading CLArrays to OpenCL as a DeviceArray
2121
# (spoiler alert: they can't contain pointers while uploading, but can on the device)
@@ -52,11 +52,13 @@ function setindex!(x::OnDeviceArray{T, N}, val, i::Vararg{Integer, N}) where {T,
5252
end
5353

5454

55+
predevice_type(::Type{T}) where T = T
56+
5557

56-
kernel_convert(A::CLArray{T, N}) where {T, N} = PreDeviceArray{T, N}(HostPtr{T}(), A.size)
58+
kernel_convert(A::CLArray{T, N}) where {T, N} = PreDeviceArray{T, N}(A.size, HostPtr{T}())
5759
predevice_type(::Type{GlobalArray{T, N}}) where {T, N} = PreDeviceArray{T, N}
5860
device_type(::CLArray{T, N}) where {T, N} = GlobalArray{T, N}
59-
reconstruct(x::PreDeviceArray{T, N}, ptr::GlobalPointer{T}) where {T, N} = GlobalArray{T, N}(ptr, x.size)
61+
reconstruct(x::PreDeviceArray{T, N}, ptr::GlobalPointer{T}) where {T, N} = GlobalArray{T, N}(x.size, ptr)
6062

6163
# some converts to inline CLArrays into tuples and refs
6264
kernel_convert(x::RefValue{T}) where T <: CLArray = RefValue(kernel_convert(x[]))
@@ -123,8 +125,8 @@ synchronize_threads(::KernelState) = cli.barrier(CLK_LOCAL_MEM_FENCE)
123125
LocalMemory(state::KernelState, ::Type{T}, ::Val{N}, ::Val{C}) where {T, N, C} = Transpiler.cli.LocalPointer{T}()
124126

125127
function (::Type{AbstractDeviceArray})(ptr::PtrT, shape::Vararg{Integer, N}) where PtrT <: Transpiler.cli.LocalPointer{T} where {T, N}
126-
DeviceArray{T, N, PtrT}(ptr, shape)
128+
DeviceArray{T, N, PtrT}(shape, ptr)
127129
end
128130
function (::Type{AbstractDeviceArray})(ptr::PtrT, shape::NTuple{N, Integer}) where PtrT <: Transpiler.cli.LocalPointer{T} where {T, N}
129-
DeviceArray{T, N, PtrT}(ptr, shape)
131+
DeviceArray{T, N, PtrT}(shape, ptr)
130132
end

test/runtests.jl

Lines changed: 88 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
using CLArrays
22
using GPUArrays.TestSuite, Base.Test
3+
34
for dev in CLArrays.devices()
45
CLArrays.init(dev)
56
@testset "Device: $dev" begin
7+
68
TestSuite.run_tests(CLArray)
79

810
@testset "muladd & abs" begin
@@ -31,15 +33,98 @@ for dev in CLArrays.devices()
3133
end
3234
end
3335

34-
# The above is equal to:
35-
# Typ = CuArray
36+
# #The above is equal to:
37+
# Typ = CLArray
3638
# GPUArrays.allowslow(false)
3739
# TestSuite.run_gpuinterface(Typ)
3840
# TestSuite.run_base(Typ)
3941
# TestSuite.run_blas(Typ)
40-
# TestSuite.run_broadcasting(Typ)
42+
# TestSuite.run_broadcasting(CLArray)
4143
# TestSuite.run_construction(Typ)
4244
# TestSuite.run_fft(Typ)
4345
# TestSuite.run_linalg(Typ)
4446
# TestSuite.run_mapreduce(Typ)
4547
# TestSuite.run_indexing(Typ)
48+
#
49+
# function test_sizes6(state, out)
50+
# x1 = (1, 2, 3)
51+
# out[1] = sizeof(x1)
52+
#
53+
# x2 = (1f0, 2f0, 3f0)
54+
# out[2] = sizeof(x2)
55+
#
56+
# x3 = ((1f0, 2f0), 1.0)
57+
# out[3] = sizeof(x3)
58+
#
59+
# x4 = ((1f0, 2f0, 3f0), 1.0)
60+
# out[4] = sizeof(x4)
61+
#
62+
# x5 = ((1f0, 2f0, 3f0, 4f0), 1.0)
63+
# out[5] = sizeof(x5)
64+
#
65+
# x6 = (1.0, (1f0, 2f0))
66+
# out[6] = sizeof(x6)
67+
#
68+
# x7 = (1.0, (1f0, 2f0, 3f0))
69+
# out[7] = sizeof(x7)
70+
#
71+
# x8 = (1.0, (1f0, 2f0, 3f0, 4f0))
72+
# out[8] = sizeof(x8)
73+
#
74+
# x9 = ((1f0, 2f0), UInt32(1), 1.0)
75+
# out[9] = sizeof(x9)
76+
#
77+
# return
78+
# end
79+
#
80+
# function test_sizes6(state, out)
81+
# x1 = 1
82+
# out[1] = sizeof(x1)
83+
#
84+
# x2 = (1, 2)
85+
# out[2] = sizeof(x2)
86+
#
87+
# x3 = (1, 2, 3)
88+
# out[3] = sizeof(x3)
89+
#
90+
# x4 = 1f0
91+
# out[4] = sizeof(x4)
92+
#
93+
# x5 = (1f0, 2f0)
94+
# out[5] = sizeof(x5)
95+
#
96+
# x6 = (1f0, 2f0, 3f0)
97+
# out[6] = sizeof(x6)
98+
#
99+
# x7 = (1f0, 2f0, 3f0, 4f0, 5f0)
100+
# out[7] = sizeof(x7)
101+
#
102+
# x8 = (1f0, 2f0, 3f0, 4f0, 5f0, 6f0)
103+
# out[8] = sizeof(x8)
104+
#
105+
# x9 = (1, 2, 3, 4, 5, 6)
106+
# out[9] = sizeof(x9)
107+
#
108+
# x10 = UInt8(1)
109+
# out[10] = sizeof(x10)
110+
#
111+
# x11 = (x10, x10)
112+
# out[11] = sizeof(x11)
113+
#
114+
# x12 = (x10, x10, x10)
115+
# out[12] = sizeof(x12)
116+
#
117+
#
118+
# x13 = CLArrays.DeviceArray{Complex{Float64},3,CLArrays.HostPtr{Complex{Float64}}}(
119+
# CLArrays.HostPtr{Complex{Float64}}(),
120+
# (Cuint(2), Cuint(2), Cuint(2))
121+
# )
122+
# out[13] = sizeof(x13)
123+
# #
124+
# # x14 = (x13, x13)
125+
# # out[14] = sizeof(x14)
126+
# #
127+
# # x15 = (x13, x13, x13)
128+
# # out[15] = sizeof(x15)
129+
# return
130+
# end

0 commit comments

Comments
 (0)