Skip to content

Commit 60ef870

Browse files
committed
better mapreduce
1 parent f41cf0e commit 60ef870

File tree

5 files changed

+54
-78
lines changed

5 files changed

+54
-78
lines changed

src/integrators/sampler.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ function (i::SamplerIntegrator)(scene::Scene, film)
5353
_tb_max = min.(_tb_min .+ (tile_size - 1), sample_bounds.p_max)
5454
_tile_bounds = Bounds2(_tb_min, _tb_max)
5555
filmtiles = [FilmTile(film, _tile_bounds, filter_radius) for _ in 1:Threads.maxthreadid()]
56-
Threads.@threads :greedy for k in 0:total_tiles
56+
Threads.@threads for k in 0:total_tiles
5757
x, y = k % width, k ÷ width
5858
tile = Point2f(x, y)
5959
tb_min = sample_bounds.p_min .+ tile .* tile_size

src/shapes/triangle_mesh.jl

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,11 +192,16 @@ end
192192
∂p∂u, ∂p∂v, δp_13, δp_23
193193
end
194194

195+
@inline function _all(f, x::StaticVector{3})
196+
f(x[1]) && f(x[2]) && f(x[3])
197+
end
198+
199+
195200
@inline function ∂n(
196201
t::Triangle, uv::AbstractVector{Point2f},
197202
)::Tuple{Normal3f,Normal3f}
198203
t_normals = normals(t)
199-
all(x-> all(isnan, x), t_normals) && return Normal3f(0), Normal3f(0)
204+
_all(x -> _all(isnan, x), t_normals) && return Normal3f(0), Normal3f(0)
200205
# Compute deltas for partial detivatives of normal.
201206
δuv_13, δuv_23 = uv[1] - uv[3], uv[2] - uv[3]
202207
δn_13, δn_23 = t_normals[1] - t_normals[3], t_normals[2] - t_normals[3]
@@ -209,12 +214,13 @@ end
209214
∂n∂u, ∂n∂v
210215
end
211216

217+
212218
@inline function _init_triangle_shading_geometry(
213219
t::Triangle, si::SurfaceInteraction,
214220
barycentric::Point3f, uv::AbstractVector{Point2f},
215221
)
216-
has_normals = all(x->all(isfinite, x), t.normals)
217-
has_tangents = all(x->all(isfinite, x), t.tangents)
222+
has_normals = _all(x -> _all(isfinite, x), t.normals)
223+
has_tangents = _all(x -> _all(isfinite, x), t.tangents)
218224
!has_normals && !has_tangents && return si
219225
# Initialize triangle shading geometry.
220226
# Compute shading normal, tangent & bitangent.

test/cuda-mwe.jl

Lines changed: 34 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
using GeometryBasics, LinearAlgebra, Trace, BenchmarkTools
22
using ImageShow
33
using Makie
4+
using KernelAbstractions
5+
import KernelAbstractions as KA
6+
using KernelAbstractions.Extras.LoopInfo: @unroll
7+
8+
# using AMDGPU
9+
# ArrayType = ROCArray
10+
using CUDA
11+
ArrayType = CuArray
12+
413
include("./../src/gpu-support.jl")
514

615
LowSphere(radius, contact=Point3f(0)) = Sphere(contact .+ Point3f(0, 0, radius), radius)
@@ -18,6 +27,7 @@ material_red = Trace.MatteMaterial(
1827
Trace.ConstantTexture(0.0f0),
1928
)
2029

30+
2131
begin
2232
s1 = tmesh(LowSphere(0.5f0), material_red)
2333
s2 = tmesh(LowSphere(0.3f0, Point3f(0.5, 0.5, 0)), material_red)
@@ -47,103 +57,59 @@ begin
4757
Trace.PointLight(Vec3f(0, 0, 2), Trace.RGBSpectrum(10.0f0)),
4858
Trace.PointLight(Vec3f(0, 3, 3), Trace.RGBSpectrum(25.0f0)),
4959
)
60+
scene = Trace.Scene([lights...], bvh)
5061
img = zeros(RGBf, res, res)
5162
end
5263

5364
@inline function get_camera_sample(p_raster::Point2)
54-
5565
p_film = p_raster .+ rand(Point2f)
5666
p_lens = rand(Point2f)
5767
Trace.CameraSample(p_film, p_lens, rand(Float32))
5868
end
5969

60-
using KernelAbstractions.Extras.LoopInfo: @unroll
61-
62-
function simple_shading(bvh, shape, ray, si, l, depth, max_depth, lights)
63-
core = si.core
64-
n = si.shading.n
65-
wo = core.wo
66-
# Compute scattering functions for surface interaction.
67-
si = Trace.compute_differentials(si, ray)
68-
mat = Trace.get_material(bvh, shape)
69-
if mat.type === Trace.NO_MATERIAL
70-
return l
71-
end
72-
bsdf = mat(si, false, Trace.Radiance)
73-
# Compute emitted light if ray hit an area light source.
74-
l += Trace.le(si, wo)
75-
# Add contribution of each light source.
76-
@unroll for light in lights
77-
sampled_li, wi, pdf, vt = Trace.sample_li(
78-
light, core, rand(Point2f),
79-
)
80-
(Trace.is_black(sampled_li) || pdf 0.0f0) && continue
81-
f = bsdf(wo, wi)
82-
if !Trace.is_black(f) && !Trace.intersect_p(bvh, Trace.spawn_ray(vt.p0, vt.p1))
83-
l += f * sampled_li * abs(wi n) / pdf
84-
end
85-
end
86-
# if depth + 1 <= max_depth
87-
# # Trace rays for specular reflection & refraction.
88-
# l += specular_reflect(bsdf, i, ray, si, scene, depth)
89-
# l += specular_transmit(bsdf, i, ray, si, scene, depth)
90-
# end
91-
return l
92-
end
70+
# ray = Trace.Ray(o=Point3f(0.5, 0.5, 1.0), d=Vec3f(0.0, 0.0, -1.0))
71+
# l = Trace.RGBSpectrum(0.0f0)
72+
# open("test3.llvm", "w") do io
73+
# code_llvm(io, simple_shading, typeof.((bvh, bvh.primitives[1], Trace.RayDifferentials(ray), Trace.SurfaceInteraction(), l, 1, 1, lights)))
74+
# end
9375

94-
95-
@inline function trace_pixel(camera, bvh, xy, lights)
76+
@inline function trace_pixel(camera, scene, xy)
9677
pixel = Point2f(Tuple(xy))
9778
camera_sample = get_camera_sample(pixel)
9879
ray, ω = Trace.generate_ray_differential(camera, camera_sample)
99-
l = Trace.RGBSpectrum(0.0f0)
10080
if ω > 0.0f0
101-
hit, shape, si = Trace.intersect!(bvh, ray)
81+
hit, shape, si = Trace.intersect!(scene, ray)
10282
if hit
103-
l = simple_shading(bvh, shape, ray, si, l, 1, 8, lights)
83+
l = Trace.li(Trace.UniformSampler(8), 5, ray, scene, 1)
10484
end
10585
end
106-
return RGBf(l.c...)
86+
return l
10787
end
10888

109-
using KernelAbstractions
110-
import KernelAbstractions as KA
111-
112-
113-
@kernel function ka_trace_image!(img, camera, bvh, lights)
114-
idx = @index(Global, Linear)
115-
if checkbounds(Bool, img, idx)
116-
xy = Tuple(divrem(idx, size(img, 1)))
117-
@inbounds img[idx] = trace_pixel(camera, bvh, xy, lights)
89+
@kernel function ka_trace_image!(img, camera, scene)
90+
linear_idx = @index(Global, Linear)
91+
if checkbounds(Bool, img, linear_idx)
92+
x = ((linear_idx - 1) % size(img, 1)) + 1
93+
y = ((linear_idx - 1) ÷ size(img, 1)) + 1
94+
l = trace_pixel(camera, scene, (x, y))
95+
@inbounds img[linear_idx] = RGBf(l.c...)
11896
end
11997
end
12098

121-
function launch_trace_image_ir!(img, camera, bvh, lights)
99+
function launch_trace_image!(img, camera, scene)
122100
backend = KA.get_backend(img)
123101
kernel! = ka_trace_image!(backend)
124-
open("test2.ir", "w") do io
125-
CUDA.@device_code_llvm io begin
126-
kernel!(img, camera, bvh, lights, ndrange=size(img), workgroupsize=(16, 16))
127-
end
128-
end
129-
AMDGPU.synchronize(; stop_hostcalls=false)
130-
return img
131-
end
132-
function launch_trace_image!(img, camera, bvh, lights)
133-
backend = KA.get_backend(img)
134-
kernel! = ka_trace_image!(backend)
135-
kernel!(img, camera, bvh, lights, ndrange=size(img), workgroupsize=(16, 16))
102+
kernel!(img, camera, scene, ndrange=size(img), workgroupsize=(16, 16))
136103
KA.synchronize(backend)
137104
return img
138105
end
139-
# using AMDGPU
140-
# ArrayType = ROCArray
141-
using CUDA
142-
ArrayType = CuArray
106+
143107
preserve = []
144-
gpu_bvh = to_gpu(ArrayType, bvh; preserve=preserve);
108+
gpu_scene = to_gpu(ArrayType, scene; preserve=preserve);
145109
gpu_img = ArrayType(zeros(RGBf, res, res));
146110
# launch_trace_image!(img, cam, bvh, lights);
147111
# @btime launch_trace_image!(img, cam, bvh, lights);
148112
# @btime launch_trace_image!(gpu_img, cam, gpu_bvh, lights);
149-
launch_trace_image!(gpu_img, cam, gpu_bvh, lights);
113+
launch_trace_image!(gpu_img, cam, gpu_scene);
114+
# @btime (launch_trace_image!(img, cam, scene));
115+
# 234.530 ms (456 allocations: 154.26 KiB)

test/gpu-sampler.jl

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ begin
5757
Trace.PointLight(Vec3f(0, 0, 2), Trace.RGBSpectrum(10.0f0)),
5858
Trace.PointLight(Vec3f(0, 3, 3), Trace.RGBSpectrum(25.0f0)),
5959
)
60+
scene = Trace.Scene([lights...], bvh)
6061
img = zeros(RGBf, res, res)
6162
end
6263

@@ -86,10 +87,12 @@ end
8687
end
8788

8889
@kernel function ka_trace_image!(img, camera, scene)
89-
xy = @index(Global, Cartesian)
90-
if checkbounds(Bool, img, xy)
91-
l = trace_pixel(camera, scene, xy)
92-
@inbounds img[xy] = RGBf(l.c...)
90+
linear_idx = @index(Global, Linear)
91+
if checkbounds(Bool, img, linear_idx)
92+
x = ((linear_idx - 1) % size(img, 1)) + 1
93+
y = ((linear_idx - 1) ÷ size(img, 1)) + 1
94+
l = trace_pixel(camera, scene, (x, y))
95+
@inbounds img[linear_idx] = RGBf(l.c...)
9396
end
9497
end
9598

@@ -108,4 +111,5 @@ gpu_img = ArrayType(zeros(RGBf, res, res));
108111
# @btime launch_trace_image!(img, cam, bvh, lights);
109112
# @btime launch_trace_image!(gpu_img, cam, gpu_bvh, lights);
110113
launch_trace_image!(gpu_img, cam, gpu_scene);
111-
launch_trace_image!(img, cam, scene)
114+
# @btime (launch_trace_image!(img, cam, scene));
115+
# 234.530 ms (456 allocations: 154.26 KiB)

test/gpu-threading-benchmarks.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ function trace_image!(img, camera, bvh)
155155
end
156156

157157
function threads_trace_image!(img, camera, bvh)
158-
Threads.@threads :static for xy in CartesianIndices(size(img))
158+
Threads.@threads for xy in CartesianIndices(size(img))
159159
@inbounds img[xy] = trace_pixel(camera, bvh, xy)
160160
end
161161
return img

0 commit comments

Comments
 (0)