fixes for OpenCL

SimonDanisch · SimonDanisch · commit 387d2a6040d5 · 2026-02-22T21:35:54.000+01:00
diff --git a/src/integrators/volpath/volpath.jl b/src/integrators/volpath/volpath.jl
@@ -107,8 +107,8 @@ Clear the integrator's internal state (RGB and weight accumulators) for restarti
 """
 function Hikari.clear!(vp::VolPath)
     if vp.state !== nothing
-        KernelAbstractions.fill!(vp.state.pixel_rgb, 0.0)  # Float64
-        KernelAbstractions.fill!(vp.state.pixel_weight_sum, 0.0)  # Float64
+        KernelAbstractions.fill!(vp.state.pixel_rgb, zero(eltype(vp.state.pixel_rgb)))
+        KernelAbstractions.fill!(vp.state.pixel_weight_sum, zero(eltype(vp.state.pixel_weight_sum)))
     end
 end
 
@@ -405,11 +405,11 @@ Following pbrt-v4's RGBFilm::GetPixelRGB:
         rgb_base = (pixel_idx - Int32(1)) * Int32(3)
         weight_sum = pixel_weight_sum[pixel_idx]
 
-        # Normalize by weight sum (pbrt-v4 style)
-        # Note: pixel_rgb and pixel_weight_sum are Float64 for accumulation precision,
-        # but we convert to Float32 for the final framebuffer output
-        if weight_sum > 0.0
-            inv_weight = 1.0 / weight_sum
+        # Normalize by weight sum (pbrt-v4 style).
+        # Use zero/one relative to weight_sum's type so the kernel stays in Float32
+        # when accumulation_eltype=Float32 (the GPU-compatible default).
+        if weight_sum > zero(weight_sum)
+            inv_weight = one(weight_sum) / weight_sum
             r = Float32(pixel_rgb[rgb_base + Int32(1)] * inv_weight)
             g = Float32(pixel_rgb[rgb_base + Int32(2)] * inv_weight)
             b = Float32(pixel_rgb[rgb_base + Int32(3)] * inv_weight)
diff --git a/src/lights/light-sampler.jl b/src/lights/light-sampler.jl
@@ -316,15 +316,15 @@ The flat index counts across all typed arrays in order.
         push!(branches, quote
             if flat_idx <= $cumsum_expr
                 vec_idx = UInt32(flat_idx - $prev_cumsum)
-                return SetKey(UInt8($i), vec_idx)
+                return SetKey(UInt32($i), vec_idx)
             end
         end)
     end
 
     quote
         $(branches...)
         # Fallback - return last valid index
-        return SetKey(UInt8($N), UInt32(length(lights.data[$N])))
+        return SetKey(UInt32($N), UInt32(length(lights.data[$N])))
     end
 end
 
diff --git a/test/gpu_compat.jl b/test/gpu_compat.jl
@@ -222,7 +222,7 @@ function gen_gpu_test_scene()
         lights=adapted.lights,
         rgb2spec_table=rgb2spec_table,
         lambda=Hikari.SampledWavelengths{4}((400f0, 500f0, 600f0, 700f0), (1f0, 1f0, 1f0, 1f0)),
-        mat_idx=Hikari.SetKey(UInt8(1), UInt32(1)),
+        mat_idx=Hikari.SetKey(UInt32(1), UInt32(1)),
         ray=Raycore.Ray(o=Point3f(0,0,0), d=Vec3f(0,0,1), t_max=Inf32),
     )
 end

Original file line number	Diff line number	Diff line change
`@@ -222,7 +222,7 @@ function gen_gpu_test_scene()`
`222`	`222`	`lights=adapted.lights,`
`223`	`223`	`rgb2spec_table=rgb2spec_table,`
`224`	`224`	`lambda=Hikari.SampledWavelengths{4}((400f0, 500f0, 600f0, 700f0), (1f0, 1f0, 1f0, 1f0)),`
`225`		`- mat_idx=Hikari.SetKey(UInt8(1), UInt32(1)),`
	`225`	`+ mat_idx=Hikari.SetKey(UInt32(1), UInt32(1)),`
`226`	`226`	`ray=Raycore.Ray(o=Point3f(0,0,0), d=Vec3f(0,0,1), t_max=Inf32),`
`227`	`227`	`)`
`228`	`228`	`end`