Skip to content

Commit 4cdcce2

Browse files
committed
more fixes
1 parent f41cf0e commit 4cdcce2

File tree

7 files changed

+109
-77
lines changed

7 files changed

+109
-77
lines changed

src/accel/bvh.jl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,8 @@ end
251251
if !ln.is_interior && ln.n_primitives > Int32(0)
252252
# Intersect ray with primitives in node.
253253
for i in Int32(0):ln.n_primitives - Int32(1)
254-
tmp_primitive = primitives[ln.offset+i]
254+
offset = ln.offset % Int32
255+
tmp_primitive = primitives[offset+i]
255256
tmp_hit, ray, tmp_interaction = intersect_p!(
256257
tmp_primitive, ray,
257258
)
@@ -293,13 +294,15 @@ end
293294

294295
to_visit_offset, current_node_i = Int32(1), Int32(1)
295296
nodes_to_visit = zeros(MVector{64,Int32})
297+
primitives = bvh.primitives
296298
@inbounds while true
297299
ln = bvh.nodes[current_node_i]
298300
if intersect_p(ln.bounds, ray, inv_dir, dir_is_neg)
299301
if !ln.is_interior && ln.n_primitives > Int32(0)
300302
for i in Int32(0):ln.n_primitives-Int32(1)
303+
offset = ln.offset % Int32
301304
intersect_p(
302-
bvh.primitives[ln.offset + i], ray,
305+
primitives[offset + i], ray,
303306
) && return true
304307
end
305308
to_visit_offset == 1 && break

src/integrators/sampler.jl

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,13 @@ function li(
8888
l = RGBSpectrum(0f0)
8989
# Find closest ray intersection or return background radiance.
9090
hit, shape, si = intersect!(scene, ray)
91+
lights = scene.lights
9192
if !hit
92-
for light in scene.lights
93-
l += le(light, ray)
93+
Base.Cartesian.@nexprs 8 i -> begin
94+
if i <= length(lights)
95+
light = lights[i]
96+
l += le(light, ray)
97+
end
9498
end
9599
return l
96100
end
@@ -112,7 +116,6 @@ function li(
112116
# Compute emitted light if ray hit an area light source.
113117
l += le(si, wo)
114118
# Add contribution of each light source.
115-
lights = scene.lights
116119
Base.Cartesian.@nexprs 8 i -> begin
117120
if i <= length(lights)
118121
light = lights[i]
@@ -129,8 +132,8 @@ function li(
129132
end
130133
if depth + 1 max_depth
131134
# Trace rays for specular reflection & refraction.
132-
# l += specular_reflect(bsdf, sampler, max_depth, ray, si, scene, depth)
133-
# l += specular_transmit(bsdf, sampler, max_depth, ray, si, scene, depth)
135+
l += specular_reflect(bsdf, sampler, max_depth, ray, si, scene, depth)
136+
l += specular_transmit(bsdf, sampler, max_depth, ray, si, scene, depth)
134137
end
135138
l
136139
end
@@ -152,7 +155,7 @@ end
152155
if !(pdf > 0f0 && !is_black(f) && abs(wi ns) != 0f0)
153156
return RGBSpectrum(0f0)
154157
end
155-
# Compute ray differential for specular reflection.
158+
# # Compute ray differential for specular reflection.
156159
rd = RayDifferentials(spawn_ray(si, wi))
157160
if ray.has_differentials
158161
rx_origin = si.core.p + si.∂p∂x
@@ -215,7 +218,7 @@ end
215218
# intersected. Compute the relative IOR by first out by assuming
216219
# that the ray is entering the object.
217220
η = 1f0 / bsdf.η
218-
if (ns ns) < 0
221+
if (ns ns) < 0f0
219222
# If the ray isn't entering the object, then we need to invert
220223
# the relative IOR and negate the normal and its derivatives.
221224
η = 1f0 / η

src/materials/bsdf.jl

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ function (b::BSDF)(
121121
return output
122122
end
123123

124+
u_int32(x) = Base.unsafe_trunc(Int32, x)
125+
124126
"""
125127
Compute incident ray direction for a given outgoing direction and
126128
a given mode of light scattering corresponding
@@ -132,16 +134,16 @@ function sample_f(
132134

133135
# Choose which BxDF to sample.
134136
matching_components = num_components(b, type)
135-
matching_components == 0 && return (
137+
matching_components == Int32(0) && return (
136138
Vec3f(0f0), RGBSpectrum(0f0), 0f0, BSDF_NONE,
137139
)
138140
component = min(
139-
max(1, ceil(Int64, u[1] * matching_components)),
141+
max(Int32(1), u_int32(ceil(u[1] * matching_components))),
140142
matching_components,
141143
)
142144
# Get BxDF for chosen component.
143145
count = component
144-
component -= 1
146+
component -= Int32(1)
145147
bxdf = UberBxDF{RGBSpectrum}()
146148
bxdfs = b.bxdfs
147149
Base.Cartesian.@nexprs 8 i -> begin
@@ -177,24 +179,24 @@ function sample_f(
177179
)
178180
wi_world = local_to_world(b, wi)
179181
# Compute overall PDF with all matching BxDFs.
180-
if !(bxdf.type & BSDF_SPECULAR != 0) && matching_components > 1
182+
if !(bxdf.type & BSDF_SPECULAR != Int32(0)) && matching_components > Int32(1)
181183
Base.Cartesian.@nexprs 8 i -> begin
182184
if i <= bxdfs.last && bxdfs[i] != bxdf && bxdfs[i] & type
183185
pdf += compute_pdf(bxdfs[i], wo, wi)
184186
end
185187
end
186188
end
187-
matching_components > 1 && (pdf /= matching_components)
189+
matching_components > Int32(1) && (pdf /= matching_components)
188190
# Compute value of BSDF for sampled direction.
189-
if !(bxdf.type & BSDF_SPECULAR != 0)
190-
reflect = ((wi_world b.ng) * (wo_world b.ng)) > 0
191+
if !(bxdf.type & BSDF_SPECULAR != Int32(0))
192+
reflect = ((wi_world b.ng) * (wo_world b.ng)) > 0f0
191193
f = RGBSpectrum(0f0)
192194
Base.Cartesian.@nexprs 8 i -> begin
193195
if i <= bxdfs.last
194196
bxdf = bxdfs[i]
195197
if ((bxdf & type) && (
196-
(reflect && (bxdf.type & BSDF_REFLECTION != 0)) ||
197-
(!reflect && (bxdf.type & BSDF_TRANSMISSION != 0))
198+
(reflect && (bxdf.type & BSDF_REFLECTION != Int32(0))) ||
199+
(!reflect && (bxdf.type & BSDF_TRANSMISSION != Int32(0)))
198200
))
199201
f += bxdf(wo, wi)
200202
end
@@ -226,11 +228,11 @@ function compute_pdf(
226228
end
227229

228230
@inline function num_components(b::BSDF, flags::UInt8)::Int64
229-
num = 0
231+
num = Int32(0)
230232
bxdfs = b.bxdfs
231233
Base.Cartesian.@nexprs 8 i -> begin
232234
if i <= bxdfs.last && (bxdfs[i] & flags)
233-
num += 1
235+
num += Int32(1)
234236
end
235237
end
236238
return num

src/sampler/sampler.jl

Lines changed: 9 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -25,36 +25,7 @@ function Sampler(samples_per_pixel::Integer)
2525
)
2626
end
2727

28-
using Random
29-
using RandomNumbers.Xorshifts
30-
31-
32-
const TRNG = Xoroshiro128Plus[]
33-
# Reset the per-thread random seeds to make results reproducible
34-
reseed!() = foreach(i-> Random.seed!(TRNG[i], i), 1:Threads.maxthreadid())
35-
36-
function __init__()
37-
# Instantiate 1 RNG (Random Number Generator) per thread, for performance.
38-
# This can't be done during precompilation since the number of threads isn't known then.
39-
resize!(TRNG, Threads.maxthreadid())
40-
for i in 1:Threads.nthreads()
41-
TRNG[i] = Xoroshiro128Plus(i)
42-
end
43-
nothing
44-
end
45-
46-
"Per-thread rand()"
47-
@inline function trand()
48-
@inbounds rng = TRNG[Threads.threadid()]
49-
rand(rng)
50-
end
51-
52-
@inline function trand(::Type{T}) where {T}
53-
@inbounds rng = TRNG[Threads.threadid()]
54-
rand(rng, T)
55-
end
56-
57-
function get_camera_sample(sampler::AbstractSampler, p_raster::Point2f)
28+
@inline function get_camera_sample(sampler::AbstractSampler, p_raster::Point2f)
5829
p_film = p_raster .+ get_2d(sampler)
5930
time = get_1d(sampler)
6031
p_lens = get_2d(sampler)
@@ -161,11 +132,13 @@ struct UniformSampler <: AbstractSampler
161132
UniformSampler(samples_per_pixel::Integer) = new(1, samples_per_pixel)
162133
end
163134

164-
function get_camera_sample(::UniformSampler, p_raster::Point2f)
165-
@inbounds rng = TRNG[Threads.threadid()]
166-
p_film = p_raster .+ rand(rng, Point2f)
167-
p_lens = rand(rng, Point2f)
168-
CameraSample(p_film, p_lens, rand(rng, Float32))
135+
@inline rand2f() = Point2f(0.5f0, 0.5f0)
136+
137+
@inline function get_camera_sample(::UniformSampler, p_raster::Point2f)
138+
p = rand2f()
139+
p_film = Point2f(p_raster[1] + p[1], p_raster[2] + p[2])
140+
p_lens = rand2f()
141+
CameraSample(p_film, p_lens, 0f0)
169142
end
170143

171144
@inline function has_next_sample(u::UniformSampler)::Bool
@@ -178,6 +151,6 @@ end
178151
u.current_sample = 1
179152
end
180153
@inline get_1d(u::UniformSampler)::Float32 = rand(Float32)
181-
@inline get_2d(u::UniformSampler)::Point2f = rand(Point2f)
154+
@inline get_2d(u::UniformSampler)::Point2f = rand2f()
182155

183156
# include("stratified.jl")

test/cuda-mwe.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ begin
5151
end
5252

5353
@inline function get_camera_sample(p_raster::Point2)
54-
5554
p_film = p_raster .+ rand(Point2f)
5655
p_lens = rand(Point2f)
5756
Trace.CameraSample(p_film, p_lens, rand(Float32))

test/gpu-sampler.jl

Lines changed: 67 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ using KernelAbstractions
55
import KernelAbstractions as KA
66
using KernelAbstractions.Extras.LoopInfo: @unroll
77

8-
# using AMDGPU
9-
# ArrayType = ROCArray
10-
using CUDA
11-
ArrayType = CuArray
8+
using AMDGPU
9+
ArrayType = ROCArray
10+
# using CUDA
11+
# ArrayType = CuArray
1212

1313
include("./../src/gpu-support.jl")
1414

@@ -57,15 +57,10 @@ begin
5757
Trace.PointLight(Vec3f(0, 0, 2), Trace.RGBSpectrum(10.0f0)),
5858
Trace.PointLight(Vec3f(0, 3, 3), Trace.RGBSpectrum(25.0f0)),
5959
)
60+
scene = Trace.Scene(lights, bvh)
6061
img = zeros(RGBf, res, res)
6162
end
6263

63-
@inline function get_camera_sample(p_raster::Point2)
64-
p_film = p_raster .+ rand(Point2f)
65-
p_lens = rand(Point2f)
66-
Trace.CameraSample(p_film, p_lens, rand(Float32))
67-
end
68-
6964
# ray = Trace.Ray(o=Point3f(0.5, 0.5, 1.0), d=Vec3f(0.0, 0.0, -1.0))
7065
# l = Trace.RGBSpectrum(0.0f0)
7166
# open("test3.llvm", "w") do io
@@ -74,12 +69,30 @@ end
7469

7570
@inline function trace_pixel(camera, scene, xy)
7671
pixel = Point2f(Tuple(xy))
77-
camera_sample = get_camera_sample(pixel)
72+
s = Trace.UniformSampler(8)
73+
camera_sample = @inline Trace.get_camera_sample(s, pixel)
7874
ray, ω = Trace.generate_ray_differential(camera, camera_sample)
7975
if ω > 0.0f0
80-
hit, shape, si = Trace.intersect!(scene, ray)
76+
l = @inline Trace.li(s, 5, ray, scene, 1)
77+
end
78+
return l
79+
end
80+
81+
@inline function trace_pixel(camera, scene, xy)
82+
pixel = Point2f(reverse(Tuple(xy)))
83+
s = Trace.UniformSampler(8)
84+
camera_sample = @inline Trace.get_camera_sample(s, pixel)
85+
ray, ω = Trace.generate_ray_differential(camera, camera_sample)
86+
l = Trace.RGBSpectrum(0.0f0)
87+
if ω > 0.0f0
88+
hit, prim, si = Trace.intersect!(scene, ray)
8189
if hit
82-
l = Trace.li(Trace.UniformSampler(8), 5, ray, scene, 1)
90+
m = Trace.get_material(scene, prim)
91+
bsdf = m(si, false, Trace.Radiance)
92+
l = Trace.specular_reflect(
93+
bsdf, s, 8, ray,
94+
si, scene, 8,
95+
)
8396
end
8497
end
8598
return l
@@ -91,6 +104,7 @@ end
91104
l = trace_pixel(camera, scene, xy)
92105
@inbounds img[xy] = RGBf(l.c...)
93106
end
107+
nothing
94108
end
95109

96110
function launch_trace_image!(img, camera, scene)
@@ -108,4 +122,44 @@ gpu_img = ArrayType(zeros(RGBf, res, res));
108122
# @btime launch_trace_image!(img, cam, bvh, lights);
109123
# @btime launch_trace_image!(gpu_img, cam, gpu_bvh, lights);
110124
launch_trace_image!(gpu_img, cam, gpu_scene);
125+
Array(gpu_img)
126+
111127
launch_trace_image!(img, cam, scene)
128+
GLMakie.activate!(float=true)
129+
image(img)
130+
131+
ray = Trace.RayDifferentials(Trace.Ray(o=Point3f(0.5, 0.5, 1.0), d=Vec3f(0.0, 0.0, -1.0)))
132+
open("le-wt.jl", "w") do io
133+
code_warntype(io, Trace.li, typeof.((Trace.UniformSampler(8), 5, ray, scene, 1)))
134+
end
135+
136+
137+
function launch_trace_image_ir!(img, camera, scene)
138+
backend = KA.get_backend(img)
139+
kernel! = ka_trace_image!(backend)
140+
open("test.ir", "w") do io
141+
try
142+
@device_code_llvm io begin
143+
kernel!(img, camera, scene, ndrange=size(img), workgroupsize=(16, 16))
144+
end
145+
catch e
146+
println(e)
147+
end
148+
end
149+
KA.synchronize(backend)
150+
return img
151+
end
152+
launch_trace_image_ir!(gpu_img, cam, gpu_scene);
153+
154+
code_llvm(Trace.intersect!, (typeof(bvh), Trace.RayDifferentials))
155+
156+
157+
function trace_image!(img, camera, scene)
158+
for xy in CartesianIndices(size(img))
159+
@inbounds img[xy] = RGBf(trace_pixel(camera, scene, xy).c...)
160+
end
161+
return img
162+
end
163+
164+
165+
@time launch_trace_image!(img, cam, scene)

test/gpu-threading-benchmarks.jl

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,11 @@ end
7474

7575
@inline function trace_pixel(camera, scene, xy)
7676
pixel = Point2f(Tuple(xy))
77-
camera_sample = get_camera_sample(pixel)
77+
s = Trace.UniformSampler(8)
78+
camera_sample = @inline Trace.get_camera_sample(s, pixel)
7879
ray, ω = Trace.generate_ray_differential(camera, camera_sample)
7980
if ω > 0.0f0
80-
hit, shape, si = Trace.intersect!(scene, ray)
81-
if hit
82-
l = Trace.li(Trace.UniformSampler(8), 5, ray, scene, 1)
83-
end
81+
l = @inline Trace.li(s, 5, ray, scene, 1)
8482
end
8583
return l
8684
end
@@ -147,9 +145,9 @@ Array(gpu_img)
147145
# workgroupsize=(16,16)
148146
# 31.022 ms (35 allocations: 5.89 KiB)
149147

150-
function trace_image!(img, camera, bvh)
148+
function trace_image!(img, camera, scene)
151149
for xy in CartesianIndices(size(img))
152-
@inbounds img[xy] = trace_pixel(camera, bvh, xy)
150+
@inbounds img[xy] = RGBf(trace_pixel(camera, scene, xy).c...)
153151
end
154152
return img
155153
end

0 commit comments

Comments
 (0)