Devsh-Graphics-Programming
diff --git a/‎examples_tests b/‎examples_tests
diff --git a/‎include/nbl/builtin/hlsl/bit.hlsl
Lines changed: 29 additions & 12 deletions b/‎include/nbl/builtin/hlsl/bit.hlsl
Lines changed: 29 additions & 12 deletions
diff --git a/‎include/nbl/builtin/hlsl/cpp_compat.hlsl
Lines changed: 3 additions & 1 deletion b/‎include/nbl/builtin/hlsl/cpp_compat.hlsl
Lines changed: 3 additions & 1 deletion
diff --git a/‎include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl
Lines changed: 1 addition & 1 deletion b/‎include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/nbl/builtin/hlsl/cpp_compat/vector.hlsl
Lines changed: 7 additions & 0 deletions b/‎include/nbl/builtin/hlsl/cpp_compat/vector.hlsl
Lines changed: 7 additions & 0 deletions
@@ -1,6 +1,7 @@
 #ifndef _NBL_BUILTIN_HLSL_BIT_INCLUDED_
 #define _NBL_BUILTIN_HLSL_BIT_INCLUDED_
 
+#include <nbl/builtin/hlsl/spirv_intrinsics/core.hlsl>
 #include <nbl/builtin/hlsl/cpp_compat.hlsl>
 
 #ifndef __HLSL_VERSION
@@ -12,6 +13,7 @@ namespace nbl::hlsl
 NBL_ALIAS_TEMPLATE_FUNCTION(std::rotl, rotl);
 NBL_ALIAS_TEMPLATE_FUNCTION(std::rotr, rotr);
 NBL_ALIAS_TEMPLATE_FUNCTION(std::countl_zero, countl_zero);
+NBL_ALIAS_TEMPLATE_FUNCTION(std::bit_cast, bit_cast);
 
 }
 #else
@@ -20,6 +22,13 @@ namespace nbl
 namespace hlsl
 {
 
+template<class T, class U>
+T bit_cast(U val)
+{
+    static_assert(sizeof(T) <= sizeof(U));
+    return spirv::bitcast<T, U>(val);
+}
+
 template<typename T, typename S>
 T rotl(T x, S s);
 template<typename T, typename S>
@@ -57,21 +66,29 @@ T rotr(T x, S s)
     }
 }
 
-template<typename T>
-uint16_t countl_zero(T n)
+namespace impl
 {
-    uint16_t result = 0u;
-    for(int32_t bits_log2=6; bits_log2>=0; bits_log2--)
-    {
-        const uint16_t shift = bits_log2 ? uint16_t(1)<<(bits_log2-1) : 0;
-        const uint64_t loMask = bits_log2 ? (1ull<<shift)-1 : 0;
-        const bool chooseHigh = n&(loMask<<shift);
-        n = uint16_t((chooseHigh ? (n>shift):n)&loMask);
+template<uint16_t bits>
+uint16_t clz(uint64_t N)
+{
+    static const uint64_t SHIFT   = bits>>1;
+    static const uint64_t LO_MASK = (1ull<<SHIFT)-1;
+    const bool CHOOSE_HIGH = N & (LO_MASK<<SHIFT);
+    const uint64_t NEXT = (CHOOSE_HIGH ? (N>>SHIFT):N)&LO_MASK;
+    const uint16_t value = uint16_t(clz<SHIFT>(NEXT) + (CHOOSE_HIGH ? 0:SHIFT));
+    return value;
+}
 
-        result += uint16_t(chooseHigh ? 0ull : shift);
-    }
 
-    return result;
+template<>
+uint16_t clz<1>(uint64_t N) { return uint16_t(1u-N&1); }
+
+}
+
+template<typename T>
+uint16_t countl_zero(T n)
+{
+    return impl::clz<sizeof(T)*8>(n);
 }
 
 }
 
@@ -1,12 +1,15 @@
 #ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_INCLUDED_
 #define _NBL_BUILTIN_HLSL_CPP_COMPAT_INCLUDED_
 
+#include <nbl/builtin/hlsl/macros.h>
 
 #ifndef __HLSL_VERSION
 #include <type_traits>
+#include <bit>
 
 #define ARROW ->
 #define NBL_CONSTEXPR constexpr
+#define NBL_CONSTEXPR_STATIC constexpr static
 #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline
 
 #define NBL_ALIAS_TEMPLATE_FUNCTION(origFunctionName, functionAlias) \
@@ -33,7 +36,6 @@ using add_pointer = std::add_pointer<T>;
 // it includes vector and matrix
 #include <nbl/builtin/hlsl/cpp_compat/intrinsics.h>
 
-
 #else
 
 #define ARROW .arrow().
 
@@ -74,7 +74,7 @@ NBL_TYPEDEF_MATRICES_FOR_SCALAR(int64_t);
 NBL_TYPEDEF_MATRICES_FOR_SCALAR(uint16_t);
 NBL_TYPEDEF_MATRICES_FOR_SCALAR(uint32_t);
 NBL_TYPEDEF_MATRICES_FOR_SCALAR(uint64_t);
-// TODO: halfMxN with std::float16_t
+NBL_TYPEDEF_MATRICES_FOR_SCALAR(float16_t);
 NBL_TYPEDEF_MATRICES_FOR_SCALAR(float32_t);
 NBL_TYPEDEF_MATRICES_FOR_SCALAR(float64_t);
 
 
@@ -7,6 +7,7 @@
 #include <glm/glm.hpp>
 #include <glm/detail/_swizzle.hpp>
 #include <stdint.h>
+#include <openexr/IlmBase/Half/half.h>
 
 namespace nbl::hlsl
 {
@@ -32,6 +33,12 @@ using uint32_t1 = vector<uint32_t, 1>;
 
 // TODO: halfN -> needs class implementation or C++23 std:float16_t
 
+using float16_t = half;
+using float16_t4 = vector<float16_t, 4>;
+using float16_t3 = vector<float16_t, 3>;
+using float16_t2 = vector<float16_t, 2>;
+using float16_t1 = vector<float16_t, 1>;
+
 using float32_t = float;
 using float32_t4 = vector<float32_t, 4>;
 using float32_t3 = vector<float32_t, 3>;