Skip to content

Commit 235fb0a

Browse files
authored
Merge pull request #143 from beasterio/baikal-next
ApiCL tests fix
2 parents b32b636 + cf11c6a commit 235fb0a

File tree

8 files changed

+49
-20
lines changed

8 files changed

+49
-20
lines changed

Calc/src/device_clw.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,9 @@ namespace Calc
459459
""
460460
#endif
461461
);
462+
#ifdef USE_SAFE_MATH
463+
buildopts.append("-D USE_SAFE_MATH");
464+
#endif
462465

463466
return new ExecutableClw(
464467
CLWProgram::CreateFromFile(filename, headernames, numheaders, buildopts.c_str(), m_context)

RadeonRays/src/kernels/CL/common.cl

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,24 @@ float fast_intersect_triangle(ray r, float3 v1, float3 v2, float3 v3, float t_ma
190190
}
191191
}
192192

193+
INLINE
194+
float3 safe_invdir(ray r)
195+
{
196+
#ifdef USE_SAFE_MATH
197+
float const dirx = r.d.x;
198+
float const diry = r.d.y;
199+
float const dirz = r.d.z;
200+
float const ooeps = exp2(-80.0f); // Avoid div by zero.
201+
float3 invdir;
202+
invdir.x = 1.0f / (fabs(dirx) > ooeps ? dirx : copysign(ooeps, dirx));
203+
invdir.y = 1.0f / (fabs(diry) > ooeps ? diry : copysign(ooeps, diry));
204+
invdir.z = 1.0f / (fabs(dirz) > ooeps ? dirz : copysign(ooeps, dirz));
205+
return invdir;
206+
#else
207+
return native_recip(r.d.xyz);
208+
#endif
209+
}
210+
193211
// Intersect rays vs bbox and return intersection span.
194212
// Intersection criteria is ret.x <= ret.y
195213
INLINE
@@ -200,7 +218,7 @@ float2 fast_intersect_bbox1(bbox box, float3 invdir, float3 oxinvdir, float t_ma
200218
float3 const tmax = max(f, n);
201219
float3 const tmin = min(f, n);
202220
float const t1 = min(min3(tmax.x, tmax.y, tmax.z), t_max);
203-
float const t0 = max(max3(tmin.x, tmin.y, tmin.z), 0.001f);
221+
float const t0 = max(max3(tmin.x, tmin.y, tmin.z), 0.f);
204222
return make_float2(t0, t1);
205223
}
206224

RadeonRays/src/kernels/CL/intersect_bvh2_bittrail.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ occluded_main(
162162
if (ray_is_active(&r))
163163
{
164164
// Precompute inverse direction and origin / dir for bbox testing
165-
float3 const invdir = native_recip(r.d.xyz);
165+
float3 const invdir = safe_invdir(r);
166166
float3 const oxinvdir = -r.o.xyz * invdir;
167167
// Intersection parametric distance
168168
float const t_max = r.o.w;
@@ -304,7 +304,7 @@ KERNEL void intersect_main(
304304
if (ray_is_active(&r))
305305
{
306306
// Precompute inverse direction and origin / dir for bbox testing
307-
float3 const invdir = native_recip(r.d.xyz);
307+
float3 const invdir = safe_invdir(r);
308308
float3 const oxinvdir = -r.o.xyz * invdir;
309309
// Intersection parametric distance
310310
float t_max = r.o.w;

RadeonRays/src/kernels/CL/intersect_bvh2_short_stack.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ occluded_main(
162162
__local int* lm_stack = lm_stack_base;
163163

164164
// Precompute inverse direction and origin / dir for bbox testing
165-
float3 const invdir = native_recip(r.d.xyz);
165+
float3 const invdir = safe_invdir(r);
166166
float3 const oxinvdir = -r.o.xyz * invdir;
167167
// Intersection parametric distance
168168
float const t_max = r.o.w;
@@ -313,7 +313,7 @@ KERNEL void intersect_main(
313313
__local int* lm_stack = lm_stack_base;
314314

315315
// Precompute inverse direction and origin / dir for bbox testing
316-
float3 const invdir = native_recip(r.d.xyz);
316+
float3 const invdir = safe_invdir(r);
317317
float3 const oxinvdir = -r.o.xyz * invdir;
318318
// Intersection parametric distance
319319
float t_max = r.o.w;

RadeonRays/src/kernels/CL/intersect_bvh2_skiplinks.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ void intersect_main(
121121
if (ray_is_active(&r))
122122
{
123123
// Precompute inverse direction and origin / dir for bbox testing
124-
float3 const invdir = native_recip(r.d.xyz);
124+
float3 const invdir = safe_invdir(r);
125125
float3 const oxinvdir = -r.o.xyz * invdir;
126126
// Intersection parametric distance
127127
float t_max = r.o.w;
@@ -225,7 +225,7 @@ void occluded_main(
225225
if (ray_is_active(&r))
226226
{
227227
// Precompute inverse direction and origin / dir for bbox testing
228-
float3 const invdir = native_recip(r.d.xyz);
228+
float3 const invdir = safe_invdir(r);
229229
float3 const oxinvdir = -r.o.xyz * invdir;
230230
// Intersection parametric distance
231231
float t_max = r.o.w;

RadeonRays/src/kernels/CL/intersect_bvh2level_skiplinks.cl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -164,15 +164,15 @@ KERNEL void intersect_main(
164164
if (ray_is_active(&r))
165165
{
166166
// Precompute invdir for bbox testing
167-
float3 invdir = native_recip(r.d.xyz);
167+
float3 invdir = safe_invdir(r);
168168
float3 invdirtop = invdir;
169169
float t_max = r.o.w;
170170

171171
// We need to keep original ray around for returns from bottom hierarchy
172172
ray top_ray = r;
173-
174173
// Fetch top level BVH index
175174
int addr = root_idx;
175+
176176
// Set top index
177177
int top_addr = INVALID_IDX;
178178
// Current shape ID
@@ -181,11 +181,11 @@ KERNEL void intersect_main(
181181
int closest_shape_id = INVALID_IDX;
182182
int closest_prim_id = INVALID_IDX;
183183
float2 closest_barycentrics;
184-
185184
while (addr != INVALID_IDX)
186185
{
187186
// Fetch next node
188187
bvh_node node = nodes[addr];
188+
189189
// Intersect against bbox
190190
float2 s = fast_intersect_bbox1(node, invdir, -r.o.xyz * invdir, t_max);
191191

@@ -247,14 +247,14 @@ KERNEL void intersect_main(
247247

248248
r = transform_ray(r, wmi0, wmi1, wmi2, wmi3);
249249
// Recalc invdir
250-
invdir = native_recip(r.d.xyz);
250+
invdir = safe_invdir(r);
251251
// And continue traversal of the bottom level BVH
252252
continue;
253253
}
254-
/*else
254+
else
255255
{
256256
addr = INVALID_IDX;
257-
}*/
257+
}
258258
}
259259
}
260260
// Traverse child nodes otherwise.
@@ -334,7 +334,7 @@ KERNEL void occluded_main(
334334
if (ray_is_active(&r))
335335
{
336336
// Precompute invdir for bbox testing
337-
float3 invdir = native_recip(r.d.xyz);
337+
float3 invdir = safe_invdir(r);
338338
float3 invdirtop = invdir;
339339
float const t_max = r.o.w;
340340

@@ -405,7 +405,7 @@ KERNEL void occluded_main(
405405

406406
r = transform_ray(r, wmi0, wmi1, wmi2, wmi3);
407407
// Recalc invdir
408-
invdir = native_recip(r.d.xyz);
408+
invdir = safe_invdir(r);;
409409
// And continue traversal of the bottom level BVH
410410
continue;
411411
}

RadeonRays/src/kernels/CL/intersect_hlbvh_stack.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ occluded_main(
117117
__local int* lm_stack = lm_stack_base;
118118

119119
// Precompute inverse direction and origin / dir for bbox testing
120-
float3 const invdir = native_recip(r.d.xyz);
120+
float3 const invdir = safe_invdir(r);
121121
float3 const oxinvdir = -r.o.xyz * invdir;
122122
// Intersection parametric distance
123123
float const t_max = r.o.w;
@@ -273,7 +273,7 @@ KERNEL void intersect_main(
273273
__local int* lm_stack = lm_stack_base;
274274

275275
// Precompute inverse direction and origin / dir for bbox testing
276-
float3 const invdir = native_recip(r.d.xyz);
276+
float3 const invdir = safe_invdir(r);
277277
float3 const oxinvdir = -r.o.xyz * invdir;
278278
// Intersection parametric distance
279279
float t_max = r.o.w;

premake5.lua

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ newoption {
8080
description = "Add tutorials projects"
8181
}
8282

83+
newoption {
84+
trigger = "safe_math",
85+
description = "use safe math"
86+
}
87+
8388
if not _OPTIONS["use_opencl"] and not _OPTIONS["use_vulkan"] and not _OPTIONS["use_embree"] then
8489
_OPTIONS["use_opencl"] = 1
8590
end
@@ -157,7 +162,7 @@ else
157162

158163
if( _OPTIONS["static_library"]) then
159164
defines{ "RR_STATIC_LIBRARY=1" }
160-
print ">> Building Radeon Rays as a static library";
165+
print ">> Building Radeon Rays as a static library";
161166
end
162167

163168
if _OPTIONS["use_opencl"] then
@@ -194,7 +199,7 @@ else
194199
defines{"USE_OPENCL=1"}
195200
end
196201
if _OPTIONS["use_vulkan"] then
197-
print ">> Vulkan backend enabled"
202+
print ">> Vulkan backend enabled"
198203
defines{"USE_VULKAN=1"}
199204
vulkanPath = ""
200205
vulkanSDKPath = os.getenv( "VK_SDK_PATH" );
@@ -240,6 +245,10 @@ else
240245
end
241246
end
242247

248+
if _OPTIONS["safe_math"] then
249+
defines { "USE_SAFE_MATH" }
250+
end
251+
243252
if fileExists("./RadeonRays/RadeonRays.lua") then
244253
dofile("./RadeonRays/RadeonRays.lua")
245254
end
@@ -286,5 +295,4 @@ else
286295
dofile("./Tutorials/Tutorials.lua")
287296
end
288297
end
289-
290298
end

0 commit comments

Comments
 (0)