@@ -290,8 +290,7 @@ bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(fl
290
290
#ifdef __HLSL_VERSION
291
291
NBL_CONSTEXPR float NEXT_ULP_AFTER_UNITY = asfloat (0x3f800001u);
292
292
#else
293
- uint32_t val = 0x3f800001u;
294
- float32_t NEXT_ULP_AFTER_UNITY = reinterpret_cast<float32_t &>( val );
293
+ NBL_CONSTEXPR float32_t NEXT_ULP_AFTER_UNITY = bit_cast<float32_t>(0x3f800001u);
295
294
#endif
296
295
const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY;
297
296
@@ -340,61 +339,6 @@ float32_t4 conditionalAbsOrMax<float32_t4>(bool cond, float32_t4 x, float32_t4 l
340
339
}
341
340
#endif
342
341
343
- namespace impl
344
- {
345
- struct bitFields // need template?
346
- {
347
- using this_t = bitFields;
348
-
349
- static this_t create (uint32_t base, uint32_t value, uint32_t offset, uint32_t count)
350
- {
351
- this_t retval;
352
- retval.base = base;
353
- retval.value = value;
354
- retval.offset = offset;
355
- retval.count = count;
356
- return retval;
357
- }
358
-
359
- uint32_t __insert ()
360
- {
361
- const uint32_t shifted_masked_value = (value & ((0x1u << count) - 1u)) << offset;
362
- const uint32_t lo = base & ((0x1u << offset) - 1u);
363
- const uint32_t hi = base ^ lo;
364
- return (hi << count) | shifted_masked_value | lo;
365
- }
366
-
367
- uint32_t __overwrite ()
368
- {
369
- #ifdef __HLSL_VERSION
370
- return spirv::bitFieldInsert<uint32_t>(base, value, offset, count);
371
- #else
372
- // TODO: double check implementation
373
- const uint32_t shifted_masked_value = ~(0xffffffffu << count) << offset;
374
- base &= ~shifted_masked_value;
375
- return base | (value << offset);
376
- #endif
377
- }
378
-
379
- uint32_t base;
380
- uint32_t value;
381
- uint32_t offset;
382
- uint32_t count;
383
- };
384
- }
385
-
386
- uint32_t bitFieldOverwrite (uint32_t base, uint32_t value, uint32_t offset, uint32_t count)
387
- {
388
- impl::bitFields b = impl::bitFields::create (base, value, offset, count);
389
- return b.__overwrite ();
390
- }
391
-
392
- uint32_t bitFieldInsert (uint32_t base, uint32_t value, uint32_t offset, uint32_t count)
393
- {
394
- impl::bitFields b = impl::bitFields::create (base, value, offset, count);
395
- return b.__insert ();
396
- }
397
-
398
342
namespace impl
399
343
{
400
344
struct trigonometry
@@ -497,80 +441,10 @@ float getSumofArccosABCD(float cosA, float cosB, float cosC, float cosD)
497
441
return acos<float >(trig.tmp4) + trig.tmp5;
498
442
}
499
443
500
- namespace impl
501
- {
502
- template<typename T, uint16_t M, uint16_t N, uint16_t P>
503
- struct applyChainRule4D
504
- {
505
- static matrix <T, P, M> __call (matrix <T, N, M> dFdG, matrix <T, P, N> dGdR)
506
- {
507
- #ifdef __HLSL_VERSION
508
- return mul (dFdG, dGdR);
509
- #else
510
- return dFdG * dGdR; // glm
511
- #endif
512
- }
513
- };
514
-
515
- template<typename T, uint16_t M, uint16_t N>
516
- struct applyChainRule3D : applyChainRule4D<T,M,N,1 >
517
- {
518
- static vector <T, N> __call (matrix <T, N, M> dFdG, vector <T, N> dGdR)
519
- {
520
- #ifdef __HLSL_VERSION
521
- return mul (dFdG, dGdR);
522
- #else
523
- return dFdG * dGdR; // glm
524
- #endif
525
- }
526
- };
527
-
528
- template<typename T, uint16_t M>
529
- struct applyChainRule2D : applyChainRule4D<T,M,1 ,1 >
530
- {
531
- static vector <T, M> __call (vector <T, M> dFdG, T dGdR)
532
- {
533
- #ifdef __HLSL_VERSION
534
- return mul (dFdG, dGdR);
535
- #else
536
- return dFdG * dGdR; // glm
537
- #endif
538
- }
539
- };
540
-
541
- template<typename T>
542
- struct applyChainRule1D : applyChainRule4D<T,1 ,1 ,1 >
543
- {
544
- static T __call (T dFdG, T dGdR)
545
- {
546
- return dFdG * dGdR;
547
- }
548
- };
549
- }
550
-
551
- // possible to derive M,N,P automatically?
552
- template<typename T, uint16_t M, uint16_t N, uint16_t P NBL_FUNC_REQUIRES (is_scalar_v<T> && M>1 && N>1 && P>1 )
553
- matrix <T, P, M> applyChainRule (matrix <T, N, M> dFdG, matrix <T, P, N> dGdR)
554
- {
555
- return impl::applyChainRule4D<T,M,N,P>::__call (dFdG, dGdR);
556
- }
557
-
558
- template<typename T, uint16_t M, uint16_t N NBL_FUNC_REQUIRES (is_scalar_v<T> && M>1 && N>1 )
559
- vector <T, N> applyChainRule (matrix <T, N, M> dFdG, vector <T, N> dGdR)
560
- {
561
- return impl::applyChainRule3D<T,M,N>::__call (dFdG, dGdR);
562
- }
563
-
564
- template<typename T, uint16_t M NBL_FUNC_REQUIRES (is_scalar_v<T> && M>1 )
565
- vector <T, M> applyChainRule (vector <T, M> dFdG, T dGdR)
566
- {
567
- return impl::applyChainRule2D<T,M>::__call (dFdG, dGdR);
568
- }
569
-
570
- template<typename T NBL_FUNC_REQUIRES (is_scalar_v<T>)
571
- T applyChainRule (T dFdG, T dGdR)
444
+ template<typename T, uint16_t M, uint16_t N, uint16_t P NBL_FUNC_REQUIRES (is_scalar_v<T>)
445
+ matrix <T,M,P> applyChainRule (matrix <T,N,M> dFdG, matrix <T,M,P> dGdR)
572
446
{
573
- return impl::applyChainRule1D<T>:: __call (dFdG, dGdR);
447
+ return mul (dFdG,dGdR);
574
448
}
575
449
576
450
}
0 commit comments