Skip to content

Commit 3cd5966

Browse files
committed
Use CLPtr instead
1 parent a53eb1c commit 3cd5966

File tree

7 files changed

+34
-30
lines changed

7 files changed

+34
-30
lines changed

examples/hands_on_opencl/ex08/matmul.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ else
151151

152152
@info("=== OpenCL, matrix mult, C row, priv A, B, cols loc, order $Ndim ====")
153153

154+
ORDER = 2
155+
154156
for i in 1:COUNT
155157
fill!(h_C, 0.0)
156158
localmem = cl.LocalMem(Float32, Pdim)

examples/hands_on_opencl/ex09/pi_ocl.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ __kernel void pi(
3535

3636
for(i= istart; i<iend; i++){
3737
x = (i+0.5f)*step_size;
38+
// arctan(x)' = 1 / (1 + x^2)
39+
// pi/4 = arctan(1)
3840
accum += 4.0f/(1.0f+x*x);
3941
}
4042

examples/hands_on_opencl/ex09/pi_ocl.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ src_dir = dirname(Base.source_path())
1818

1919
#
2020
# Some constant values
21-
const INSTEPS = 512*512*512
22-
const ITERS = 262144
21+
INSTEPS = 512*512*512
22+
ITERS = 262144
2323

2424
# Set some default values:
2525
# Default number of steps (updated later to device prefereable)
26-
const in_nsteps = INSTEPS
26+
in_nsteps = INSTEPS
2727

2828
# Default number of iterations
29-
const niters = ITERS
29+
niters = ITERS
3030

3131
kernelsource = read(joinpath(src_dir, "pi_ocl.cl"), String)
3232
program = cl.Program(source=kernelsource) |> cl.build!

examples/hands_on_opencl/exA/pi_vocl.jl

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ if length(ARGS) < 1
2727
exit(1)
2828
end
2929
vector_size = parse(Int, ARGS[1])
30+
vector_size = 8
3031

3132
if vector_size == 1
3233
ITERS = 262144
@@ -68,13 +69,11 @@ end
6869
nwork_groups = in_nsteps ÷ (work_group_size * niters)
6970

7071
# get the max work group size for the kernel on our device
71-
if vector_size == 1
72-
max_size = cl.work_group_info(pi_kernel, cl.device()).size
73-
elseif vector_size == 4
74-
max_size = cl.work_group_info(pi_kernel, cl.device()).size
75-
elseif vector_size == 8
76-
max_size = cl.work_group_info(pi_kernel, cl.device()).size
77-
end
72+
max_size = cl.work_group_info(pi_kernel, cl.device()).size
73+
cl.work_group_info(pi_kernel, cl.device()).prefered_size_multiple
74+
cl.work_group_info(pi_kernel, cl.device()).private_mem_size
75+
cl.work_group_info(pi_kernel, cl.device()).local_mem_size
76+
cl.work_group_info(pi_kernel, cl.device()).compile_size
7877

7978
if max_size > work_group_size
8079
work_group_size = max_size

examples/notebooks/Transpose.ipynb

Lines changed: 17 additions & 17 deletions
Large diffs are not rendered by default.

examples/performance.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,6 @@ end
106106
# N_WORKERS has to be less than or equal to the device's max work group size
107107
# ex. N_WORKERS = 1 is non parallel execution on the gpu
108108

109-
const N_DATA_PTS = Int(2^23) # ~8 million
110-
const N_WORKERS = Int(2^7)
109+
const N_DATA_PTS = Int(2^2) # ~8 million
110+
const N_WORKERS = Int(2^2)
111111
cl_performance(N_DATA_PTS, N_WORKERS)

test/Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
33
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
44
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
55
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
6+
GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
67
IOCapture = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
78
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
89
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"

0 commit comments

Comments
 (0)