Skip to content

Commit 387d2a6

Browse files
committed
fixes for OpenCL
1 parent b7c6cf0 commit 387d2a6

File tree

3 files changed

+10
-10
lines changed

3 files changed

+10
-10
lines changed

src/integrators/volpath/volpath.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ Clear the integrator's internal state (RGB and weight accumulators) for restarti
107107
"""
108108
function Hikari.clear!(vp::VolPath)
109109
if vp.state !== nothing
110-
KernelAbstractions.fill!(vp.state.pixel_rgb, 0.0) # Float64
111-
KernelAbstractions.fill!(vp.state.pixel_weight_sum, 0.0) # Float64
110+
KernelAbstractions.fill!(vp.state.pixel_rgb, zero(eltype(vp.state.pixel_rgb)))
111+
KernelAbstractions.fill!(vp.state.pixel_weight_sum, zero(eltype(vp.state.pixel_weight_sum)))
112112
end
113113
end
114114

@@ -405,11 +405,11 @@ Following pbrt-v4's RGBFilm::GetPixelRGB:
405405
rgb_base = (pixel_idx - Int32(1)) * Int32(3)
406406
weight_sum = pixel_weight_sum[pixel_idx]
407407

408-
# Normalize by weight sum (pbrt-v4 style)
409-
# Note: pixel_rgb and pixel_weight_sum are Float64 for accumulation precision,
410-
# but we convert to Float32 for the final framebuffer output
411-
if weight_sum > 0.0
412-
inv_weight = 1.0 / weight_sum
408+
# Normalize by weight sum (pbrt-v4 style).
409+
# Use zero/one relative to weight_sum's type so the kernel stays in Float32
410+
# when accumulation_eltype=Float32 (the GPU-compatible default).
411+
if weight_sum > zero(weight_sum)
412+
inv_weight = one(weight_sum) / weight_sum
413413
r = Float32(pixel_rgb[rgb_base + Int32(1)] * inv_weight)
414414
g = Float32(pixel_rgb[rgb_base + Int32(2)] * inv_weight)
415415
b = Float32(pixel_rgb[rgb_base + Int32(3)] * inv_weight)

src/lights/light-sampler.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,15 +316,15 @@ The flat index counts across all typed arrays in order.
316316
push!(branches, quote
317317
if flat_idx <= $cumsum_expr
318318
vec_idx = UInt32(flat_idx - $prev_cumsum)
319-
return SetKey(UInt8($i), vec_idx)
319+
return SetKey(UInt32($i), vec_idx)
320320
end
321321
end)
322322
end
323323

324324
quote
325325
$(branches...)
326326
# Fallback - return last valid index
327-
return SetKey(UInt8($N), UInt32(length(lights.data[$N])))
327+
return SetKey(UInt32($N), UInt32(length(lights.data[$N])))
328328
end
329329
end
330330

test/gpu_compat.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ function gen_gpu_test_scene()
222222
lights=adapted.lights,
223223
rgb2spec_table=rgb2spec_table,
224224
lambda=Hikari.SampledWavelengths{4}((400f0, 500f0, 600f0, 700f0), (1f0, 1f0, 1f0, 1f0)),
225-
mat_idx=Hikari.SetKey(UInt8(1), UInt32(1)),
225+
mat_idx=Hikari.SetKey(UInt32(1), UInt32(1)),
226226
ray=Raycore.Ray(o=Point3f(0,0,0), d=Vec3f(0,0,1), t_max=Inf32),
227227
)
228228
end

0 commit comments

Comments
 (0)