Skip to content

Commit 57f7818

Browse files
committed
almost 0 allocations
1 parent c7cd082 commit 57f7818

File tree

8 files changed

+135
-110
lines changed

8 files changed

+135
-110
lines changed

src/accel/bvh.jl

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ struct LinearBVHInterior <: LinearNode
4747
end
4848
const LinearBVH = Union{LinearBVHLeaf,LinearBVHInterior}
4949

50-
struct BVHAccel <: AccelPrimitive
51-
primitives::Vector{P} where P<:Primitive
50+
struct BVHAccel{P <: Primitive} <: AccelPrimitive
51+
primitives::Vector{P}
5252
max_node_primitives::UInt8
5353
nodes::Vector{LinearBVH}
5454
nodes_to_visit::Vector{Vector{Int32}}
@@ -57,15 +57,15 @@ struct BVHAccel <: AccelPrimitive
5757
primitives::Vector{P}, max_node_primitives::Integer = 1,
5858
) where P<:Primitive
5959
max_node_primitives = min(255, max_node_primitives)
60-
length(primitives) == 0 && return new(primitives, max_node_primitives)
60+
isempty(primitives) && return new{P}(primitives, max_node_primitives)
6161
nodes_to_visit = [zeros(Int32, 64) for _ in 1:Threads.maxthreadid()]
6262
primitives_info = [
6363
BVHPrimitiveInfo(i, world_bound(p))
6464
for (i, p) in enumerate(primitives)
6565
]
6666

6767
total_nodes = Ref(0)
68-
ordered_primitives = Vector{P}(undef, 0)
68+
ordered_primitives = P[]
6969
root = _init(
7070
primitives, primitives_info, 1, length(primitives),
7171
total_nodes, ordered_primitives, max_node_primitives,
@@ -76,7 +76,7 @@ struct BVHAccel <: AccelPrimitive
7676
_unroll(flattened, root, offset)
7777
@real_assert total_nodes[] + 1 == offset[]
7878

79-
new(ordered_primitives, max_node_primitives, flattened, nodes_to_visit)
79+
new{P}(ordered_primitives, max_node_primitives, flattened, nodes_to_visit)
8080
end
8181
end
8282

@@ -210,19 +210,21 @@ end
210210
length(bvh.nodes) > 0 ? bvh.nodes[1].bounds : Bounds3()
211211
end
212212

213-
function intersect!(pool, bvh::BVHAccel, ray::MutableRef{<:AbstractRay})
213+
function intersect!(pool, bvh::BVHAccel{P}, ray::MutableRef{<:AbstractRay})::Tuple{Bool, P, SurfaceInteraction} where P
214214
hit = false
215215
interaction = SurfaceInteraction()
216-
primitive::Maybe{GeometricPrimitive} = nothing
217-
length(bvh.nodes) == 0 && return hit, primitive, interaction
216+
isempty(bvh.nodes) && return hit, nothing, interaction
218217

219218
check_direction!(ray)
220219
inv_dir = 1f0 ./ ray.d
221220
dir_is_neg = is_dir_negative(ray.d)
222221

223222
to_visit_offset::Int32, current_node_i::Int32 = 1, 1
224223
@inbounds nodes_to_visit = bvh.nodes_to_visit[Threads.threadid()]
225-
nodes_to_visit .= 0
224+
@inbounds for i in eachindex(nodes_to_visit)
225+
nodes_to_visit[i] = Int32(0)
226+
end
227+
primitive::P = first(bvh.primitives)
226228
@inbounds while true
227229
ln = bvh.nodes[current_node_i]
228230
if intersect_p(pool, ln.bounds, ray, inv_dir, dir_is_neg)
@@ -240,21 +242,21 @@ function intersect!(pool, bvh::BVHAccel, ray::MutableRef{<:AbstractRay})
240242
end
241243
end
242244
to_visit_offset == 1 && break
243-
to_visit_offset -= 1
245+
to_visit_offset -= Int32(1)
244246
current_node_i = nodes_to_visit[to_visit_offset]
245247
else
246248
if dir_is_neg[ln.split_axis] == 2
247249
nodes_to_visit[to_visit_offset] = current_node_i + Int32(1)
248250
current_node_i = ln.second_child_offset
249251
else
250252
nodes_to_visit[to_visit_offset] = ln.second_child_offset
251-
current_node_i += 1
253+
current_node_i += Int32(1)
252254
end
253-
to_visit_offset += 1
255+
to_visit_offset += Int32(1)
254256
end
255257
else
256258
to_visit_offset == 1 && break
257-
to_visit_offset -= 1
259+
to_visit_offset -= Int32(1)
258260
current_node_i = nodes_to_visit[to_visit_offset]
259261
end
260262
end

src/bounds.jl

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,15 @@ function area(b::Bounds2)
9090
end
9191

9292
@inline function sides(b::Union{Bounds2,Bounds3})
93-
[abs(b1 - b0) for (b1, b0) in zip(b.p_max, b.p_min)]
93+
return map(b.p_max, b.p_min) do b1, b0
94+
return abs(b1 - b0)
95+
end
9496
end
9597

9698
@inline function inclusive_sides(b::Union{Bounds2,Bounds3})
97-
[abs(b1 - (b0 - 1f0)) for (b1, b0) in zip(b.p_max, b.p_min)]
99+
return map(b.p_max, b.p_min) do b1, b0
100+
abs(b1 - (b0 - 1.0f0))
101+
end
98102
end
99103

100104
function volume(b::Bounds3)

src/film.jl

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -102,30 +102,47 @@ struct FilmTile{Pixels<:AbstractMatrix{<:FilmTilePixel}}
102102
filter_table_width::Int32
103103
pixels::Pixels
104104

105-
function FilmTile(
106-
bounds::Bounds2, filter_radius::Point2f,
107-
filter_table_width::Int32,
108-
)
109-
tile_res = (Int32.(inclusive_sides(bounds)))
110-
contrib_sum = fill(RGBSpectrum(), tile_res[2], tile_res[1])
111-
filter_weight_sum = fill(0.0f0, tile_res[2], tile_res[1])
112-
pixels = StructArray{FilmTilePixel{RGBSpectrum}}((contrib_sum, filter_weight_sum))
113-
new{typeof(pixels)}(
114-
bounds, filter_radius, 1f0 ./ filter_radius,
115-
filter_table_width,
116-
pixels,
117-
)
118-
end
105+
end
106+
107+
function FilmTile(
108+
bounds::Bounds2, filter_radius::Point2f,
109+
filter_table_width::Int32,
110+
)
111+
tile_res = (Int32.(inclusive_sides(bounds))) .+ 2
112+
contrib_sum = fill(RGBSpectrum(), tile_res[2], tile_res[1])
113+
filter_weight_sum = fill(0.0f0, tile_res[2], tile_res[1])
114+
pixels = StructArray{FilmTilePixel{RGBSpectrum}}((contrib_sum, filter_weight_sum))
115+
FilmTile{typeof(pixels)}(
116+
bounds, filter_radius, 1.0f0 ./ filter_radius,
117+
filter_table_width,
118+
pixels,
119+
)
119120
end
120121

121122
"""
122123
Bounds should start from 1 not 0.
123124
"""
124-
function FilmTile(f::Film, sample_bounds::Bounds2)
125-
p0 = ceil.(sample_bounds.p_min .- 0.5f0 .- f.filter.radius)
126-
p1 = floor.(sample_bounds.p_max .- 0.5f0 .+ f.filter.radius) .+ 1f0
125+
function FilmTile(f::Film, sample_bounds::Bounds2, radius)
126+
p0 = ceil.(sample_bounds.p_min .- 0.5f0 .- radius)
127+
p1 = floor.(sample_bounds.p_max .- 0.5f0 .+ radius) .+ 1f0
127128
tile_bounds = Bounds2(p0, p1) f.crop_bounds
128-
FilmTile(tile_bounds, f.filter.radius, f.filter_table_width)
129+
FilmTile(tile_bounds, radius, f.filter_table_width)
130+
end
131+
132+
function reset!(tile::FilmTile)
133+
tile.pixels.contrib_sum .= (RGBSpectrum(0f0),)
134+
tile.pixels.filter_weight_sum .= 0f0
135+
end
136+
137+
function update_bounds!(f::Film, tile::FilmTile, sample_bounds::Bounds2)
138+
reset!(tile)
139+
radius = tile.filter_radius
140+
p0 = ceil.(sample_bounds.p_min .- 0.5f0 .- radius)
141+
p1 = floor.(sample_bounds.p_max .- 0.5f0 .+ radius) .+ 1.0f0
142+
bounds = Bounds2(p0, p1) f.crop_bounds
143+
tile_res = (Int32.(inclusive_sides(bounds)))
144+
@assert all(reverse(tile_res) .<= size(tile.pixels)) "$(reverse(tile_res)) != $(size(tile.pixels)) $(sample_bounds)"
145+
FilmTile(bounds, radius, tile.inv_filter_radius, tile.filter_table_width, tile.pixels)
129146
end
130147

131148
function filter_offset!(offsets, start, stop, discrete_point, inv_filter_radius, filter_table_width)

src/integrators/sampler.jl

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,78 @@
11
abstract type SamplerIntegrator <: Integrator end
22

3-
struct WhittedIntegrator <: SamplerIntegrator
4-
camera::C where C<:Camera
5-
sampler::S where S<:AbstractSampler
3+
struct WhittedIntegrator{C<: Camera, S <: AbstractSampler} <: SamplerIntegrator
4+
camera::C
5+
sampler::S
66
max_depth::Int64
77
end
88

9+
@noinline function sample_kernel_inner(pool, i, scene, t_sampler, film, film_tile, camera, pixel, spp_sqr)
10+
while has_next_sample(t_sampler)
11+
free_all(pool) # clear memory pool
12+
camera_sample = get_camera_sample(t_sampler, pixel)
13+
ray, ω = generate_ray_differential(pool, camera, camera_sample)
14+
scale_differentials!(ray, spp_sqr)
15+
l = RGBSpectrum(0f0)
16+
if ω > 0.0f0
17+
l = li(pool, i, ray, scene, 1)
18+
end
19+
# TODO check l for invalid values
20+
if isnan(l)
21+
l = RGBSpectrum(0f0)
22+
end
23+
add_sample!(film, film_tile, camera_sample.film, l, ω)
24+
start_next_sample!(t_sampler)
25+
end
26+
end
27+
28+
@noinline function sample_kernel(mempools, i, camera, scene, film, film_tile, tile_bounds)
29+
pool = mempools[Threads.threadid()]
30+
t_sampler = deepcopy(i.sampler)
31+
spp_sqr = 1f0 / Float32(t_sampler.samples_per_pixel)
32+
for pixel in tile_bounds
33+
start_pixel!(t_sampler, pixel)
34+
sample_kernel_inner(pool, i, scene, t_sampler, film, film_tile, camera, pixel, spp_sqr)
35+
end
36+
merge_film_tile!(film, film_tile)
37+
end
938

1039
"""
1140
Render scene.
1241
"""
1342
function (i::SamplerIntegrator)(scene::Scene)
14-
1543
sample_bounds = get_sample_bounds(get_film(i.camera))
1644
sample_extent = diagonal(sample_bounds)
1745
tile_size = 16
18-
n_tiles::Point2 = Int64.(floor.((sample_extent .+ tile_size) ./ tile_size))
19-
46+
n_tiles = Int64.(floor.((sample_extent .+ tile_size) ./ tile_size))
2047
# TODO visualize tile bounds to see if they overlap
2148
width, height = n_tiles
2249
total_tiles = width * height - 1
2350
bar = Progress(total_tiles, 1)
24-
2551
@info "Utilizing $(Threads.nthreads()) threads"
26-
mempools = [MemoryPool(round(Int, 2*16384)) for _ in 1:Threads.maxthreadid()]
52+
mempools = [MemoryPool(round(Int, 3*16384)) for _ in 1:Threads.maxthreadid()]
2753
film = get_film(i.camera)
54+
camera = i.camera
55+
filter_radius = film.filter.radius
56+
57+
_tile = Point2f(0f0)
58+
_tb_min = sample_bounds.p_min .+ _tile .* tile_size
59+
_tb_max = min.(_tb_min .+ (tile_size - 1), sample_bounds.p_max)
60+
_tile_bounds = Bounds2(_tb_min, _tb_max)
61+
filmtiles = [FilmTile(film, _tile_bounds, filter_radius) for _ in 1:Threads.maxthreadid()]
2862
Threads.@threads for k in 0:total_tiles
2963
x, y = k % width, k ÷ width
3064
tile = Point2f(x, y)
31-
t_sampler = deepcopy(i.sampler)
32-
3365
tb_min = sample_bounds.p_min .+ tile .* tile_size
3466
tb_max = min.(tb_min .+ (tile_size - 1), sample_bounds.p_max)
35-
tile_bounds = Bounds2(tb_min, tb_max)
36-
37-
film_tile = FilmTile(film, tile_bounds)
38-
spp_sqr = 1f0 / Float32(t_sampler.samples_per_pixel)
39-
pool = mempools[Threads.threadid()]
40-
for pixel in tile_bounds
41-
start_pixel!(t_sampler, pixel)
42-
while has_next_sample(t_sampler)
43-
free_all(pool) # clear memory pool
44-
camera_sample = get_camera_sample(t_sampler, pixel)
45-
ray, ω = generate_ray_differential(pool, i.camera, camera_sample)
46-
scale_differentials!(ray, spp_sqr)
47-
l = RGBSpectrum(0f0)
48-
ω > 0.0f0 && (l = li(pool, i, ray, scene, 1))
49-
# TODO check l for invalid values
50-
isnan(l) && (l = RGBSpectrum(0f0))
51-
add_sample!(film, film_tile, camera_sample.film, l, ω)
52-
start_next_sample!(t_sampler)
53-
end
67+
if tb_min[1] < tb_max[1] && tb_min[2] < tb_max[2]
68+
tile_bounds = Bounds2(tb_min, tb_max)
69+
film_tile = filmtiles[Threads.threadid()]
70+
film_tile = update_bounds!(film, film_tile, tile_bounds)
71+
sample_kernel(mempools, i, camera, scene, film, film_tile, tile_bounds)
5472
end
55-
merge_film_tile!(get_film(i.camera), film_tile)
5673
next!(bar)
5774
end
58-
save(get_film(i.camera))
75+
save(film)
5976
end
6077

6178
function li(

src/lights/spot.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ end
4444
end
4545

4646
function sample_le(
47-
pool, s::SpotLight, u1::Point2f, ::Point2f, ::Float32,
48-
)::Tuple{RGBSpectrum,Ray,Normal3f,Float32,Float32}
47+
pool, s::SpotLight, u1::Point2f, ::Point2f, ::Float32,
48+
)::Tuple{RGBSpectrum,Ray,Normal3f,Float32,Float32}
49+
4950
w = s.light_to_world(uniform_sample_cone(u1, s.cos_total_width))
5051
ray = default(pool, Ray; o=s.position, d=w)
5152
light_normal = Normal3f(ray.d)

src/materials/uber-material.jl

Lines changed: 25 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -19,32 +19,36 @@ struct TrowbridgeReitzDistribution <: MicrofacetDistribution
1919
end
2020

2121

22-
abstract type Fresnel end
23-
struct FresnelConductor{S<:Spectrum} <: Fresnel
24-
ηi::S
25-
ηt::S
26-
k::S
22+
const FRESNEL_CONDUCTOR = UInt8(1)
23+
const FRESNEL_DIELECTRIC = UInt8(2)
24+
const FRESNEL_NO_OP = UInt8(3)
25+
26+
struct Fresnel
27+
ηi::RGBSpectrum
28+
ηt::RGBSpectrum
29+
k::RGBSpectrum
30+
type::UInt8
2731
end
28-
FresnelConductor() = FresnelConductor(RGBSpectrum(0f0), RGBSpectrum(0f0), RGBSpectrum(0f0))
29-
struct FresnelDielectric <: Fresnel
30-
ηi::Float32
31-
ηt::Float32
32+
33+
FresnelConductor(ni, nt, k) = Fresnel(ni, nt, k, FRESNEL_CONDUCTOR)
34+
FresnelDielectric(ni::Float32, nt::Float32) = Fresnel(RGBSpectrum(ni), RGBSpectrum(nt), RGBSpectrum(0.0f0), FRESNEL_DIELECTRIC)
35+
FresnelNoOp() = Fresnel(RGBSpectrum(0.0f0), RGBSpectrum(0.0f0), RGBSpectrum(0.0f0), FRESNEL_NO_OP)
36+
37+
function (f::Fresnel)(cos_θi::Float32)
38+
if f.type === FRESNEL_CONDUCTOR
39+
return fresnel_conductor(cos_θi, f.ηi, f.ηt, f.k)
40+
elseif f.type === FRESNEL_DIELECTRIC
41+
return fresnel_dielectric(cos_θi, f.ηi[1], f.ηt[1])
42+
end
43+
return RGBSpectrum(1.0f0)
3244
end
33-
FresnelDielectric() = FresnelDielectric(0f0, 0f0)
34-
struct FresnelNoOp <: Fresnel end
35-
(f::FresnelConductor)(cos_θi::Float32) = fresnel_conductor(cos_θi, f.ηi, f.ηt, f.k)
36-
(f::FresnelDielectric)(cos_θi::Float32) = fresnel_dielectric(cos_θi, f.ηi, f.ηt)
37-
(f::FresnelNoOp)(::Float32) = RGBSpectrum(1.0f0)
45+
3846

3947
struct UberBxDF{S<:Spectrum}
4048
"""
4149
Describes fresnel properties.
4250
"""
43-
fresnel_con::FresnelConductor{S}
44-
fresnel_di::FresnelDielectric
45-
fresnel_no::FresnelNoOp
46-
which_fresnel::UInt8 # selected fresnel
47-
51+
fresnel::Fresnel
4852
"""
4953
Spectrum used to scale the reflected color.
5054
"""
@@ -82,34 +86,14 @@ function UberBxDF{S}(active::Bool, bxdf_type::UInt8;
8286
r=Trace.RGBSpectrum(1f0), t=Trace.RGBSpectrum(1f0),
8387
a=0f0, b=0f0, η_a=0f0, η_b=0f0,
8488
distribution=TrowbridgeReitzDistribution(),
85-
fresnel=nothing,
89+
fresnel=FresnelNoOp(),
8690
type=UInt8(0),
8791
transport=UInt8(0)
8892
) where {S<:Spectrum}
89-
used_fresnel = UInt8(0)
90-
fresnel_con = FresnelConductor()
91-
fresnel_di = FresnelDielectric()
92-
fresnel_no = FresnelNoOp()
93-
94-
if !isnothing(fresnel)
95-
if fresnel isa FresnelConductor
96-
fresnel_con = fresnel
97-
used_fresnel = UInt8(1)
98-
elseif fresnel isa FresnelDielectric
99-
fresnel_di = fresnel
100-
used_fresnel = UInt8(2)
101-
elseif fresnel isa FresnelNoOp
102-
fresnel_no = fresnel
103-
used_fresnel = UInt8(3)
104-
end
105-
end
10693
_distribution = distribution isa TrowbridgeReitzDistribution ? distribution : TrowbridgeReitzDistribution()
107-
return UberBxDF{S}(fresnel_con, fresnel_di, fresnel_no, used_fresnel, r, t, a, b, η_a, η_b, _distribution, transport, type, bxdf_type, active)
94+
return UberBxDF{S}(fresnel, r, t, a, b, η_a, η_b, _distribution, transport, type, bxdf_type, active)
10895
end
10996

110-
fresnel(f::UberBxDF)= getfield(f, Int(f.which_fresnel))
111-
112-
11397
@inline function sample_f(s::UberBxDF, wo::Vec3f, sample::Point2f)::Tuple{Vec3f,Float32,RGBSpectrum,UInt8}
11498
if s.bxdf_type === SPECULAR_REFLECTION
11599
return sample_specular_reflection(s, wo, sample)

src/reflection/microfacet.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ function distribution_microfacet_reflection(m::UberBxDF{S}, wo::Vec3f, wi::Vec3f
213213
(cosθi 0 || cosθo 0) && return S(0f0)
214214
wh Vec3f(0) && return S(0f0)
215215
wh = normalize(wh)
216-
f = fresnel(m)(wi face_forward(wh, Vec3f(0, 0, 1)))
216+
f = m.fresnel(wi face_forward(wh, Vec3f(0, 0, 1)))
217217
return m.r * D(m.distribution, wh) * G(m.distribution, wo, wi) *
218218
f / (4f0 * cosθi * cosθo)
219219
end

0 commit comments

Comments
 (0)