Skip to content

Commit 4c5d2a8

Browse files
committed
performance improvements
1 parent afef8f2 commit 4c5d2a8

File tree

10 files changed

+242
-149
lines changed

10 files changed

+242
-149
lines changed

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
99
GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326"
1010
ImageCore = "a09fc81d-aa75-5fe9-8630-4744c3626534"
1111
ImageIO = "82e4d734-157c-48bb-816b-45c225c6df19"
12+
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
1213
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1314
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
1415
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -22,6 +23,7 @@ FileIO = "1.16"
2223
GeometryBasics = "0.4"
2324
ImageCore = "0.10"
2425
ImageIO = "0.6"
26+
KernelAbstractions = "0.9.24"
2527
ProgressMeter = "1.10"
2628
RandomNumbers = "1.6.0"
2729
StaticArrays = "1.9.7"

src/Trace.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ using StaticArrays
99
using ProgressMeter
1010
using StructArrays
1111
using Atomix
12+
using KernelAbstractions
1213

1314
abstract type AbstractRay end
1415
abstract type Spectrum end

src/accel/bvh.jl

Lines changed: 21 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -230,53 +230,27 @@ function _unroll(
230230
end
231231

232232
@inline function world_bound(bvh::BVHAccel)::Bounds3
233-
length(bvh.nodes) > 0 ? bvh.nodes[1].bounds : Bounds3()
234-
end
235-
236-
macro ntuple(N, value)
237-
expr = :(())
238-
for i in 1:N
239-
push!(expr.args, :($(esc(value))))
240-
end
241-
return expr
242-
end
243-
244-
macro setindex(N, setindex_expr)
245-
@assert Meta.isexpr(setindex_expr, :(=))
246-
index_expr = setindex_expr.args[1]
247-
@assert Meta.isexpr(index_expr, :ref)
248-
tuple = index_expr.args[1]
249-
idx = index_expr.args[2]
250-
value = setindex_expr.args[2]
251-
expr = :(())
252-
for i in 1:N
253-
push!(expr.args, :(ifelse($i != $(esc(idx)), $(esc(tuple))[$i], $(esc(value)))))
254-
end
255-
return :($(esc(tuple)) = $expr)
233+
length(bvh.nodes) > Int32(0) ? bvh.nodes[1].bounds : Bounds3()
256234
end
257235

258236
@inline function intersect!(bvh::BVHAccel{P}, ray::AbstractRay) where {P}
259237
hit = false
260238
interaction = SurfaceInteraction()
261-
262239
ray = check_direction(ray)
263240
inv_dir = 1f0 ./ ray.d
264241
dir_is_neg = is_dir_negative(ray.d)
265242

266243
to_visit_offset, current_node_i = Int32(1), Int32(1)
267-
# Tuple version is 2us slower, which makes the total rendering time go from 5s to 7s -.-s
268-
# no other way to do this on the GPU though, is there?
269-
nodes_to_visit = @ntuple 64 Int32(0)
270-
# nodes_to_visit = bvh.nodes_to_visit[Threads.threadid()]
244+
nodes_to_visit = zeros(MVector{64,Int32})
271245
primitives = bvh.primitives
272-
primitive = first(primitives)
246+
@inbounds primitive = primitives[1]
273247
nodes = bvh.nodes
274248
@inbounds while true
275249
ln = nodes[current_node_i]
276250
if intersect_p(ln.bounds, ray, inv_dir, dir_is_neg)
277-
if !(ln.is_interior) && ln.n_primitives > 0
251+
if !ln.is_interior && ln.n_primitives > Int32(0)
278252
# Intersect ray with primitives in node.
279-
for i in 0:ln.n_primitives-1
253+
for i in Int32(0):ln.n_primitives - Int32(1)
280254
tmp_primitive = primitives[ln.offset+i]
281255
tmp_hit, ray, tmp_interaction = intersect_p!(
282256
tmp_primitive, ray,
@@ -287,17 +261,15 @@ end
287261
primitive = tmp_primitive
288262
end
289263
end
290-
to_visit_offset == 1 && break
264+
to_visit_offset == Int32(1) && break
291265
to_visit_offset -= Int32(1)
292266
current_node_i = nodes_to_visit[to_visit_offset]
293267
else
294-
if dir_is_neg[ln.split_axis] == 2
295-
@setindex 64 nodes_to_visit[to_visit_offset] = Int32(current_node_i + 1)
296-
# nodes_to_visit[to_visit_offset] = Int32(current_node_i + 1)
268+
if dir_is_neg[ln.split_axis] == Int32(2)
269+
nodes_to_visit[to_visit_offset] = current_node_i + Int32(1)
297270
current_node_i = Int32(ln.offset)
298271
else
299-
@setindex 64 nodes_to_visit[to_visit_offset] = Int32(ln.offset)
300-
# nodes_to_visit[to_visit_offset] = Int32(ln.offset)
272+
nodes_to_visit[to_visit_offset] = ln.offset % Int32
301273
current_node_i += Int32(1)
302274
end
303275
to_visit_offset += Int32(1)
@@ -313,41 +285,40 @@ end
313285

314286
@inline function intersect_p(bvh::BVHAccel, ray::AbstractRay)
315287

316-
length(bvh.nodes) == 0 && return false
288+
length(bvh.nodes) == Int32(0) && return false
317289

318290
ray = check_direction(ray)
319291
inv_dir = 1f0 ./ ray.d
320292
dir_is_neg = is_dir_negative(ray.d)
321293

322294
to_visit_offset, current_node_i = Int32(1), Int32(1)
323-
nodes_to_visit = @ntuple 64 Int32(0)
324-
# nodes_to_visit = bvh.nodes_to_visit[Threads.threadid()]
295+
nodes_to_visit = zeros(MVector{64,Int32})
325296
while true
326297
ln = bvh.nodes[current_node_i]
327298
if intersect_p(ln.bounds, ray, inv_dir, dir_is_neg)
328-
if !ln.is_interior && ln.n_primitives > 0
329-
for i in 0:ln.n_primitives-1
299+
if !ln.is_interior && ln.n_primitives > Int32(0)
300+
for i in Int32(0):ln.n_primitives-Int32(1)
330301
intersect_p(
331-
bvh.primitives[ln.offset+i], ray,
302+
bvh.primitives[ln.offset + i], ray,
332303
) && return true
333304
end
334305
to_visit_offset == 1 && break
335306
to_visit_offset -= Int32(1)
336307
current_node_i = nodes_to_visit[to_visit_offset]
337308
else
338-
if dir_is_neg[ln.split_axis] == 2
339-
@setindex 64 nodes_to_visit[to_visit_offset] = Int32(current_node_i + 1)
340-
# nodes_to_visit[to_visit_offset] = Int32(current_node_i + 1)
341-
current_node_i = Int32(ln.offset)
309+
if dir_is_neg[ln.split_axis] == Int32(2)
310+
# @setindex 64 nodes_to_visit[to_visit_offset] = Int32(current_node_i + 1)
311+
nodes_to_visit[to_visit_offset] = current_node_i + Int32(1)
312+
current_node_i = ln.offset % Int32
342313
else
343-
@setindex 64 nodes_to_visit[to_visit_offset] = Int32(ln.offset)
344-
# nodes_to_visit[to_visit_offset] = Int32(ln.offset)
314+
# @setindex 64 nodes_to_visit[to_visit_offset] = Int32(ln.offset)
315+
nodes_to_visit[to_visit_offset] = ln.offset % Int32
345316
current_node_i += Int32(1)
346317
end
347318
to_visit_offset += Int32(1)
348319
end
349320
else
350-
to_visit_offset == 1 && break
321+
to_visit_offset == Int32(1) && break
351322
to_visit_offset -= Int32(1)
352323
current_node_i = Int32(nodes_to_visit[to_visit_offset])
353324
end

src/bounds.jl

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -185,20 +185,22 @@ dir_is_negative: 1 -- false, 2 -- true
185185
b::Bounds3, ray::AbstractRay,
186186
inv_dir::Vec3f, dir_is_negative::Point3{UInt8},
187187
)::Bool
188-
tx_min = (b[dir_is_negative[1]][1] - ray.o[1]) * inv_dir[1]
189-
tx_max = (b[3-dir_is_negative[1]][1] - ray.o[1]) * inv_dir[1]
190-
ty_min = (b[dir_is_negative[2]][2] - ray.o[2]) * inv_dir[2]
191-
ty_max = (b[3-dir_is_negative[2]][2] - ray.o[2]) * inv_dir[2]
192-
193-
(tx_min > ty_max || ty_min > tx_max) && return false
194-
ty_min > tx_min && (tx_min = ty_min)
195-
ty_max > tx_max && (tx_max = ty_max)
196-
197-
tz_min = (b[dir_is_negative[3]][3] - ray.o[3]) * inv_dir[3]
198-
tz_max = (b[3-dir_is_negative[3]][3] - ray.o[3]) * inv_dir[3]
199-
(tx_min > tz_max || tz_min > tx_max) && return false
200-
201-
(tz_min > tx_min) && (tx_min = tz_min)
202-
(tz_max < tx_max) && (tx_max = tz_max)
203-
tx_min < ray.t_max && tx_max > 0
188+
@inbounds begin
189+
tx_min = (b[dir_is_negative[1]][1] - ray.o[1]) * inv_dir[1]
190+
tx_max = (b[3-dir_is_negative[1]][1] - ray.o[1]) * inv_dir[1]
191+
ty_min = (b[dir_is_negative[2]][2] - ray.o[2]) * inv_dir[2]
192+
ty_max = (b[3-dir_is_negative[2]][2] - ray.o[2]) * inv_dir[2]
193+
194+
(tx_min > ty_max || ty_min > tx_max) && return false
195+
ty_min > tx_min && (tx_min = ty_min)
196+
ty_max > tx_max && (tx_max = ty_max)
197+
198+
tz_min = (b[dir_is_negative[3]][3] - ray.o[3]) * inv_dir[3]
199+
tz_max = (b[3-dir_is_negative[3]][3] - ray.o[3]) * inv_dir[3]
200+
(tx_min > tz_max || tz_min > tx_max) && return false
201+
202+
(tz_min > tx_min) && (tx_min = tz_min)
203+
(tz_max < tx_max) && (tx_max = tz_max)
204+
return tx_min < ray.t_max && tx_max > 0f0
205+
end
204206
end

src/gpu-support.jl

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,54 @@
1-
to_gpu(ArrayType, m::AbstractArray) = AMDGPU.rocconvert(ArrayType(m))
1+
import KernelAbstractions as KA
22

3-
function to_gpu(ArrayType, m::Trace.Texture)
3+
KA.@kernel some_kernel_f() = nothing
4+
5+
function some_kernel(arr)
6+
backend = KA.get_backend(arr)
7+
return some_kernel_f(backend)
8+
end
9+
10+
function to_gpu(ArrayType, m::AbstractArray; preserve=[])
11+
arr = ArrayType(m)
12+
push!(preserve, arr)
13+
kernel = some_kernel(arr)
14+
return KA.argconvert(kernel, arr)
15+
end
16+
17+
function to_gpu(ArrayType, m::Trace.Texture; preserve=[])
418
@assert !Trace.no_texture(m)
519
return Trace.Texture(
6-
to_gpu(ArrayType, m.data),
20+
to_gpu(ArrayType, m.data; preserve=preserve),
721
m.const_value,
822
m.isconst,
923
)
1024
end
1125

12-
function to_gpu(ArrayType, m::Trace.UberMaterial)
26+
function to_gpu(ArrayType, m::Trace.UberMaterial; preserve=[])
1327
@assert !Trace.no_texture(m.Kd)
14-
Kd = to_gpu(ArrayType, m.Kd)
28+
Kd = to_gpu(ArrayType, m.Kd; preserve=preserve)
1529
no_tex_s = typeof(Kd)()
16-
f_tex = to_gpu(ArrayType, Trace.Texture(ArrayType(zeros(Float32, 1, 1))))
30+
f_tex = to_gpu(ArrayType, Trace.Texture(ArrayType(zeros(Float32, 1, 1))); preserve=preserve)
1731
no_tex_f = typeof(f_tex)()
1832
return Trace.UberMaterial(
1933
Kd,
20-
Trace.no_texture(m.Ks) ? no_tex_s : to_gpu(ArrayType, m.Ks),
21-
Trace.no_texture(m.Kr) ? no_tex_s : to_gpu(ArrayType, m.Kr),
22-
Trace.no_texture(m.Kt) ? no_tex_s : to_gpu(ArrayType, m.Kt),
23-
Trace.no_texture(m.σ) ? no_tex_f : to_gpu(ArrayType, m.σ),
24-
Trace.no_texture(m.roughness) ? no_tex_f : to_gpu(ArrayType, m.roughness),
25-
Trace.no_texture(m.u_roughness) ? no_tex_f : to_gpu(ArrayType, m.u_roughness),
26-
Trace.no_texture(m.v_roughness) ? no_tex_f : to_gpu(ArrayType, m.v_roughness),
27-
Trace.no_texture(m.index) ? no_tex_f : to_gpu(ArrayType, m.index),
34+
Trace.no_texture(m.Ks) ? no_tex_s : to_gpu(ArrayType, m.Ks; preserve=preserve),
35+
Trace.no_texture(m.Kr) ? no_tex_s : to_gpu(ArrayType, m.Kr; preserve=preserve),
36+
Trace.no_texture(m.Kt) ? no_tex_s : to_gpu(ArrayType, m.Kt; preserve=preserve),
37+
Trace.no_texture(m.σ) ? no_tex_f : to_gpu(ArrayType, m.σ; preserve=preserve),
38+
Trace.no_texture(m.roughness) ? no_tex_f : to_gpu(ArrayType, m.roughness; preserve=preserve),
39+
Trace.no_texture(m.u_roughness) ? no_tex_f : to_gpu(ArrayType, m.u_roughness; preserve=preserve),
40+
Trace.no_texture(m.v_roughness) ? no_tex_f : to_gpu(ArrayType, m.v_roughness; preserve=preserve),
41+
Trace.no_texture(m.index) ? no_tex_f : to_gpu(ArrayType, m.index; preserve=preserve),
2842
m.remap_roughness,
2943
m.type,
3044
)
3145
end
3246

3347
# Conversion constructor for e.g. GPU arrays
3448
# TODO, create tree on GPU? Not sure if that will gain much though...
35-
function to_gpu(ArrayType, bvh::Trace.BVHAccel)
36-
primitives = to_gpu(ArrayType, bvh.primitives)
37-
nodes = to_gpu(ArrayType, bvh.nodes)
38-
materials = to_gpu(ArrayType, to_gpu.((ArrayType,), bvh.materials))
49+
function to_gpu(ArrayType, bvh::Trace.BVHAccel; preserve=[])
50+
primitives = to_gpu(ArrayType, bvh.primitives; preserve=preserve)
51+
nodes = to_gpu(ArrayType, bvh.nodes; preserve=preserve)
52+
materials = to_gpu(ArrayType, to_gpu.((ArrayType,), bvh.materials; preserve=preserve); preserve=preserve)
3953
return Trace.BVHAccel(primitives, materials, bvh.max_node_primitives, nodes)
4054
end

src/materials/bsdf.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,12 @@ end
5656
BSDF() = BSDF{RGBSpectrum}(NaN32, Normal3f(0f0), Normal3f(0f0), Vec3f(0f0), Vec3f(0f0), BXDFVector{RGBSpectrum}())
5757

5858

59-
function BSDF(si::SurfaceInteraction, sbdfs::Vararg{UberBxDF{S}, N}) where {S<:Spectrum, N}
59+
function BSDF(si::SurfaceInteraction, sbdfs::Vararg{UberBxDF{RGBSpectrum}, N}) where {N}
6060
BSDF(si, 1f0, sbdfs...)
6161
end
6262

63-
function BSDF(si::SurfaceInteraction, η::Float32, sbdfs::Vararg{UberBxDF{S},N}) where {S<:Spectrum, N}
6463

64+
function BSDF(si::SurfaceInteraction, η::Float32, sbdfs::Vararg{UberBxDF{RGBSpectrum},N}) where {N}
6565
ng = si.core.n
6666
ns = si.shading.n
6767
ss = normalize(si.shading.∂p∂u)

src/materials/uber-material.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,8 @@ end
152152
elseif s.bxdf_type === MICROFACET_TRANSMISSION
153153
return distribution_microfacet_transmission(s, wo, wi)
154154
end
155-
error("Unknown BxDF type $(s.bxdf_type)")
155+
return RGBSpectrum(0.0f0)
156+
# error("Unknown BxDF type $(s.bxdf_type)")
156157
end
157158

158159
struct UberMaterial{STAType,FTAType} <: Material

0 commit comments

Comments
 (0)