Fix normal quantization cache

kevyuu · kevyuu · commit e0013cbedccf · 2025-07-22T06:47:16.000+07:00
diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h
@@ -43,23 +43,23 @@ class CDirQuantCacheBase
 				
 				Vector8u3() : x(0u),y(0u),z(0u) {}
 				Vector8u3(const Vector8u3&) = default;
-				explicit Vector8u3(const hlsl::float32_t3& val)
+				explicit Vector8u3(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector8u3& operator=(const Vector8u3&) = default;
-				Vector8u3& operator=(const hlsl::float32_t3& val)
+				Vector8u3& operator=(const hlsl::uint32_t4& val)
 				{
 					x = val.x;
 					y = val.y;
 					z = val.z;
 					return *this;
 				}
 
-        hlsl::float32_t3 getValue() const
+        hlsl::uint32_t4 getValue() const
 				{
-					return { x, y, z };
+					return { x, y, z, 0 };
 				}
 
 
@@ -75,24 +75,24 @@ class CDirQuantCacheBase
 				
 				Vector8u4() : x(0u),y(0u),z(0u),w(0u) {}
 				Vector8u4(const Vector8u4&) = default;
-				explicit Vector8u4(const hlsl::float32_t3& val)
+				explicit Vector8u4(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector8u4& operator=(const Vector8u4&) = default;
-				Vector8u4& operator=(const hlsl::float32_t3& val)
+				Vector8u4& operator=(const hlsl::uint32_t4& val)
 				{
 					x = val.x;
 					y = val.y;
 					z = val.z;
-					w = 0;
+					w = val.w;
 					return *this;
 				}
 
-        hlsl::float32_t3 getValue() const
+        hlsl::uint32_t4 getValue() const
 				{
-					return { x, y, z };
+					return { x, y, z, w };
 				}
 				
 			private:
@@ -109,17 +109,16 @@ class CDirQuantCacheBase
 
 				Vector1010102() : storage(0u) {}
 				Vector1010102(const Vector1010102&) = default;
-				explicit Vector1010102(const hlsl::float32_t3& val)
+				explicit Vector1010102(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector1010102& operator=(const Vector1010102&) = default;
-				Vector1010102& operator=(const hlsl::float32_t3& val)
+				Vector1010102& operator=(const hlsl::uint32_t4& val)
 				{
 					constexpr auto storageBits = quantizationBits + 1u;
-					hlsl::uint32_t3 u32_val = { val.x, val.y, val.z };
-					storage = u32_val.x | (u32_val.y << storageBits) | (u32_val.z << (storageBits * 2u));
+					storage = val.x | (val.y << storageBits) | (val.z << (storageBits * 2u));
 					return *this;
 				}
 
@@ -132,11 +131,11 @@ class CDirQuantCacheBase
 					return storage==other.storage;
 				}
 
-        hlsl::float32_t3 getValue() const
+        hlsl::uint32_t4 getValue() const
 				{
 					constexpr auto storageBits = quantizationBits + 1u;
 					const auto mask = (0x1u << storageBits) - 1u;
-					return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask};
+					return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask, 0};
 				}
 
 			private:
@@ -151,23 +150,23 @@ class CDirQuantCacheBase
 				
 				Vector16u3() : x(0u),y(0u),z(0u) {}
 				Vector16u3(const Vector16u3&) = default;
-				explicit Vector16u3(const hlsl::float32_t3& val)
+				explicit Vector16u3(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector16u3& operator=(const Vector16u3&) = default;
-				Vector16u3& operator=(const hlsl::float32_t3& val)
+				Vector16u3& operator=(const hlsl::uint32_t4& val)
 				{
 					x = val.x;
 					y = val.y;
 					z = val.z;
 					return *this;
 				}
 
-        hlsl::float32_t3 getValue() const
+        hlsl::uint32_t4 getValue() const
 				{
-					return { x, y, z };
+					return { x, y, z, 0 };
 				}
 
 			private:
@@ -182,24 +181,24 @@ class CDirQuantCacheBase
 
 				Vector16u4() : x(0u),y(0u),z(0u),w(0u) {}
 				Vector16u4(const Vector16u4&) = default;
-				explicit Vector16u4(const hlsl::float32_t3& val)
+				explicit Vector16u4(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector16u4& operator=(const Vector16u4&) = default;
-				Vector16u4& operator=(const hlsl::float32_t3& val)
+				Vector16u4& operator=(const hlsl::uint32_t4& val)
 				{
 					x = val.x;
 					y = val.y;
 					z = val.z;
-					w = 0;
+					w = val.w;
 					return *this;
 				}
 
-        hlsl::float32_t3 getValue() const
+        hlsl::float32_t4 getValue() const
 				{
-					return { x, y, z };
+					return { x, y, z, w };
 				}
 
 			private:
@@ -379,11 +378,28 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 		std::tuple<cache_type_t<Formats>...> cache;
 		
 		template<uint32_t dimensions, E_FORMAT CacheFormat>
-		value_type_t<CacheFormat> quantize(const hlsl::float32_t3& value)
+		value_type_t<CacheFormat> quantize(const hlsl::vector<hlsl::float32_t, dimensions>& value)
 		{
-			const auto negativeMask = lessThan(value, hlsl::float32_t3(0.0f));
-
-			const hlsl::float32_t3 absValue = abs(value);
+			auto to_float32_t4 = [](hlsl::vector<hlsl::float32_t, dimensions> src) -> hlsl::float32_t4
+      {
+        if constexpr(dimensions == 1)
+        {
+          return {src.x, 0, 0, 0};
+        } else if constexpr (dimensions == 2)
+        {
+          return {src.x, src.y, 0, 0};
+        } else if constexpr (dimensions == 3)
+        {
+          return {src.x, src.y, src.z, 0};
+        } else if constexpr (dimensions == 4)
+        {
+          return {src.x, src.y, src.z, src.w};
+        }
+      };
+
+			const auto negativeMask = to_float32_t4(lessThan(value, hlsl::vector<hlsl::float32_t, dimensions>(0.0f)));
+
+			const hlsl::vector<hlsl::float32_t, dimensions> absValue = abs(value);
 			const auto key = Key(absValue);
 
 			constexpr auto quantizationBits = quantization_bits_v<CacheFormat>;
@@ -397,29 +413,42 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 				{
 					const auto fit = findBestFit<dimensions,quantizationBits>(absValue);
 
-					quantized = abs(fit);
+					const auto abs_fit = to_float32_t4(abs(fit));
+          quantized = hlsl::uint32_t4(abs_fit.x, abs_fit.y, abs_fit.z, abs_fit.w);
+
 					insertIntoCache<CacheFormat>(key,quantized);
 				}
 			}
 
-			//return quantized.
-			const auto negativeMulVec = hlsl::float32_t3(negativeMask.x ? -1 : 1, negativeMask.y ? -1 : 1, negativeMask.z ? -1 : 1);
-      return value_type_t<CacheFormat>(negativeMulVec * quantized.getValue());
+			auto switch_vec = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)
+      {
+					hlsl::uint32_t4 retval;
+					retval.x = mask.x ? val2.x : val1.x;
+					retval.y = mask.y ? val2.y : val1.y;
+					retval.z = mask.z ? val2.z : val1.z;
+					retval.w = mask.w ? val2.w : val1.w;
+					return retval;
+      };
+
+      const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u);
+      auto restoredAsVec = quantized.getValue() ^ switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask);
+      restoredAsVec += switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(1u), negativeMask);
+      return value_type_t<CacheFormat>(restoredAsVec & xorflag);
 		}
 
 		template<uint32_t dimensions, uint32_t quantizationBits>
-		static inline hlsl::float32_t3 findBestFit(const hlsl::float32_t3& value)
+		static inline hlsl::vector<hlsl::float32_t, dimensions> findBestFit(const hlsl::vector<hlsl::float32_t, dimensions>& value)
 		{
 			static_assert(dimensions>1u,"No point");
 			static_assert(dimensions<=4u,"High Dimensions are Hard!");
 
 			const auto vectorForDots = hlsl::normalize(value);
 
 			//
-			hlsl::float32_t3 fittingVector;
-			hlsl::float32_t3 floorOffset;
+			hlsl::vector<hlsl::float32_t, dimensions> fittingVector;
+			hlsl::vector<hlsl::float32_t, dimensions> floorOffset;
 			constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u;
-			hlsl::float32_t3 corners[cornerCount] = {};
+			hlsl::vector<hlsl::float32_t, dimensions> corners[cornerCount] = {};
 			{
 				uint32_t maxDirCompIndex = 0u;
 				for (auto i=1u; i<dimensions; i++)
@@ -431,7 +460,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 				if (maxDirectionComp < std::sqrtf(0.9998f / float(dimensions)))
 				{
 					_NBL_DEBUG_BREAK_IF(true);
-					return hlsl::float32_t3(0.f);
+					return hlsl::vector<hlsl::float32_t, dimensions>(0.f);
 				}
 				fittingVector = value / maxDirectionComp;
 				floorOffset[maxDirCompIndex] = 0.499f;
@@ -453,9 +482,9 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 				}
 			}
 
-			hlsl::float32_t3 bestFit;
+			hlsl::vector<hlsl::float32_t, dimensions> bestFit;
 			float closestTo1 = -1.f;
-			auto evaluateFit = [&](const hlsl::float32_t3& newFit) -> void
+			auto evaluateFit = [&](const hlsl::vector<hlsl::float32_t, dimensions>& newFit) -> void
 			{
 				auto newFitLen = length(newFit);
 				const float dp = hlsl::dot(newFit,vectorForDots) / (newFitLen);
@@ -467,7 +496,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 			};
 
 			constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u;
-			const hlsl::float32_t3 cubeHalfSizeND = hlsl::float32_t3(cubeHalfSize);
+			const hlsl::vector<hlsl::float32_t, dimensions> cubeHalfSizeND = hlsl::vector<hlsl::float32_t, dimensions>(cubeHalfSize);
 			for (uint32_t n=cubeHalfSize; n>0u; n--)
 			{
 				//we'd use float addition in the interest of speed, to increment the loop

Original file line number	Diff line number	Diff line change
`@@ -43,23 +43,23 @@ class CDirQuantCacheBase`
`43`	`43`
`44`	`44`	`Vector8u3() : x(0u),y(0u),z(0u) {}`
`45`	`45`	`Vector8u3(const Vector8u3&) = default;`
`46`		`- explicit Vector8u3(const hlsl::float32_t3& val)`
	`46`	`+ explicit Vector8u3(const hlsl::uint32_t4& val)`
`47`	`47`	`{`
`48`	`48`	`operator=(val);`
`49`	`49`	`}`
`50`	`50`
`51`	`51`	`Vector8u3& operator=(const Vector8u3&) = default;`
`52`		`- Vector8u3& operator=(const hlsl::float32_t3& val)`
	`52`	`+ Vector8u3& operator=(const hlsl::uint32_t4& val)`
`53`	`53`	`{`
`54`	`54`	`x = val.x;`
`55`	`55`	`y = val.y;`
`56`	`56`	`z = val.z;`
`57`	`57`	`return *this;`
`58`	`58`	`}`
`59`	`59`
`60`		`- hlsl::float32_t3 getValue() const`
	`60`	`+ hlsl::uint32_t4 getValue() const`
`61`	`61`	`{`
`62`		`- return { x, y, z };`
	`62`	`+ return { x, y, z, 0 };`
`63`	`63`	`}`
`64`	`64`
`65`	`65`
`@@ -75,24 +75,24 @@ class CDirQuantCacheBase`
`75`	`75`
`76`	`76`	`Vector8u4() : x(0u),y(0u),z(0u),w(0u) {}`
`77`	`77`	`Vector8u4(const Vector8u4&) = default;`
`78`		`- explicit Vector8u4(const hlsl::float32_t3& val)`
	`78`	`+ explicit Vector8u4(const hlsl::uint32_t4& val)`
`79`	`79`	`{`
`80`	`80`	`operator=(val);`
`81`	`81`	`}`
`82`	`82`
`83`	`83`	`Vector8u4& operator=(const Vector8u4&) = default;`
`84`		`- Vector8u4& operator=(const hlsl::float32_t3& val)`
	`84`	`+ Vector8u4& operator=(const hlsl::uint32_t4& val)`
`85`	`85`	`{`
`86`	`86`	`x = val.x;`
`87`	`87`	`y = val.y;`
`88`	`88`	`z = val.z;`
`89`		`- w = 0;`
	`89`	`+ w = val.w;`
`90`	`90`	`return *this;`
`91`	`91`	`}`
`92`	`92`
`93`		`- hlsl::float32_t3 getValue() const`
	`93`	`+ hlsl::uint32_t4 getValue() const`
`94`	`94`	`{`
`95`		`- return { x, y, z };`
	`95`	`+ return { x, y, z, w };`
`96`	`96`	`}`
`97`	`97`
`98`	`98`	`private:`
`@@ -109,17 +109,16 @@ class CDirQuantCacheBase`
`109`	`109`
`110`	`110`	`Vector1010102() : storage(0u) {}`
`111`	`111`	`Vector1010102(const Vector1010102&) = default;`
`112`		`- explicit Vector1010102(const hlsl::float32_t3& val)`
	`112`	`+ explicit Vector1010102(const hlsl::uint32_t4& val)`
`113`	`113`	`{`
`114`	`114`	`operator=(val);`
`115`	`115`	`}`
`116`	`116`
`117`	`117`	`Vector1010102& operator=(const Vector1010102&) = default;`
`118`		`- Vector1010102& operator=(const hlsl::float32_t3& val)`
	`118`	`+ Vector1010102& operator=(const hlsl::uint32_t4& val)`
`119`	`119`	`{`
`120`	`120`	`constexpr auto storageBits = quantizationBits + 1u;`
`121`		`- hlsl::uint32_t3 u32_val = { val.x, val.y, val.z };`
`122`		`- storage = u32_val.x \| (u32_val.y << storageBits) \| (u32_val.z << (storageBits * 2u));`
	`121`	`+ storage = val.x \| (val.y << storageBits) \| (val.z << (storageBits * 2u));`
`123`	`122`	`return *this;`
`124`	`123`	`}`
`125`	`124`
`@@ -132,11 +131,11 @@ class CDirQuantCacheBase`
`132`	`131`	`return storage==other.storage;`
`133`	`132`	`}`
`134`	`133`
`135`		`- hlsl::float32_t3 getValue() const`
	`134`	`+ hlsl::uint32_t4 getValue() const`
`136`	`135`	`{`
`137`	`136`	`constexpr auto storageBits = quantizationBits + 1u;`
`138`	`137`	`const auto mask = (0x1u << storageBits) - 1u;`
`139`		`- return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask};`
	`138`	`+ return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask, 0};`
`140`	`139`	`}`
`141`	`140`
`142`	`141`	`private:`
`@@ -151,23 +150,23 @@ class CDirQuantCacheBase`
`151`	`150`
`152`	`151`	`Vector16u3() : x(0u),y(0u),z(0u) {}`
`153`	`152`	`Vector16u3(const Vector16u3&) = default;`
`154`		`- explicit Vector16u3(const hlsl::float32_t3& val)`
	`153`	`+ explicit Vector16u3(const hlsl::uint32_t4& val)`
`155`	`154`	`{`
`156`	`155`	`operator=(val);`
`157`	`156`	`}`
`158`	`157`
`159`	`158`	`Vector16u3& operator=(const Vector16u3&) = default;`
`160`		`- Vector16u3& operator=(const hlsl::float32_t3& val)`
	`159`	`+ Vector16u3& operator=(const hlsl::uint32_t4& val)`
`161`	`160`	`{`
`162`	`161`	`x = val.x;`
`163`	`162`	`y = val.y;`
`164`	`163`	`z = val.z;`
`165`	`164`	`return *this;`
`166`	`165`	`}`
`167`	`166`
`168`		`- hlsl::float32_t3 getValue() const`
	`167`	`+ hlsl::uint32_t4 getValue() const`
`169`	`168`	`{`
`170`		`- return { x, y, z };`
	`169`	`+ return { x, y, z, 0 };`
`171`	`170`	`}`
`172`	`171`
`173`	`172`	`private:`
`@@ -182,24 +181,24 @@ class CDirQuantCacheBase`
`182`	`181`
`183`	`182`	`Vector16u4() : x(0u),y(0u),z(0u),w(0u) {}`
`184`	`183`	`Vector16u4(const Vector16u4&) = default;`
`185`		`- explicit Vector16u4(const hlsl::float32_t3& val)`
	`184`	`+ explicit Vector16u4(const hlsl::uint32_t4& val)`
`186`	`185`	`{`
`187`	`186`	`operator=(val);`
`188`	`187`	`}`
`189`	`188`
`190`	`189`	`Vector16u4& operator=(const Vector16u4&) = default;`
`191`		`- Vector16u4& operator=(const hlsl::float32_t3& val)`
	`190`	`+ Vector16u4& operator=(const hlsl::uint32_t4& val)`
`192`	`191`	`{`
`193`	`192`	`x = val.x;`
`194`	`193`	`y = val.y;`
`195`	`194`	`z = val.z;`
`196`		`- w = 0;`
	`195`	`+ w = val.w;`
`197`	`196`	`return *this;`
`198`	`197`	`}`
`199`	`198`
`200`		`- hlsl::float32_t3 getValue() const`
	`199`	`+ hlsl::float32_t4 getValue() const`
`201`	`200`	`{`
`202`		`- return { x, y, z };`
	`201`	`+ return { x, y, z, w };`
`203`	`202`	`}`
`204`	`203`
`205`	`204`	`private:`
`@@ -379,11 +378,28 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::`
`379`	`378`	`std::tuple<cache_type_t<Formats>...> cache;`
`380`	`379`
`381`	`380`	`template<uint32_t dimensions, E_FORMAT CacheFormat>`
`382`		`- value_type_t<CacheFormat> quantize(const hlsl::float32_t3& value)`
	`381`	`+ value_type_t<CacheFormat> quantize(const hlsl::vector<hlsl::float32_t, dimensions>& value)`
`383`	`382`	`{`
`384`		`- const auto negativeMask = lessThan(value, hlsl::float32_t3(0.0f));`
`385`		`-`
`386`		`- const hlsl::float32_t3 absValue = abs(value);`
	`383`	`+ auto to_float32_t4 = [](hlsl::vector<hlsl::float32_t, dimensions> src) -> hlsl::float32_t4`
	`384`	`+ {`
	`385`	`+ if constexpr(dimensions == 1)`
	`386`	`+ {`
	`387`	`+ return {src.x, 0, 0, 0};`
	`388`	`+ } else if constexpr (dimensions == 2)`
	`389`	`+ {`
	`390`	`+ return {src.x, src.y, 0, 0};`
	`391`	`+ } else if constexpr (dimensions == 3)`
	`392`	`+ {`
	`393`	`+ return {src.x, src.y, src.z, 0};`
	`394`	`+ } else if constexpr (dimensions == 4)`
	`395`	`+ {`
	`396`	`+ return {src.x, src.y, src.z, src.w};`
	`397`	`+ }`
	`398`	`+ };`
	`399`	`+`
	`400`	`+ const auto negativeMask = to_float32_t4(lessThan(value, hlsl::vector<hlsl::float32_t, dimensions>(0.0f)));`
	`401`	`+`
	`402`	`+ const hlsl::vector<hlsl::float32_t, dimensions> absValue = abs(value);`
`387`	`403`	`const auto key = Key(absValue);`
`388`	`404`
`389`	`405`	`constexpr auto quantizationBits = quantization_bits_v<CacheFormat>;`
`@@ -397,29 +413,42 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::`
`397`	`413`	`{`
`398`	`414`	`const auto fit = findBestFit<dimensions,quantizationBits>(absValue);`
`399`	`415`
`400`		`- quantized = abs(fit);`
	`416`	`+ const auto abs_fit = to_float32_t4(abs(fit));`
	`417`	`+ quantized = hlsl::uint32_t4(abs_fit.x, abs_fit.y, abs_fit.z, abs_fit.w);`
	`418`	`+`
`401`	`419`	`insertIntoCache<CacheFormat>(key,quantized);`
`402`	`420`	`}`
`403`	`421`	`}`
`404`	`422`
`405`		`- //return quantized.`
`406`		`- const auto negativeMulVec = hlsl::float32_t3(negativeMask.x ? -1 : 1, negativeMask.y ? -1 : 1, negativeMask.z ? -1 : 1);`
`407`		`- return value_type_t<CacheFormat>(negativeMulVec * quantized.getValue());`
	`423`	`+ auto switch_vec = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)`
	`424`	`+ {`
	`425`	`+ hlsl::uint32_t4 retval;`
	`426`	`+ retval.x = mask.x ? val2.x : val1.x;`
	`427`	`+ retval.y = mask.y ? val2.y : val1.y;`
	`428`	`+ retval.z = mask.z ? val2.z : val1.z;`
	`429`	`+ retval.w = mask.w ? val2.w : val1.w;`
	`430`	`+ return retval;`
	`431`	`+ };`
	`432`	`+`
	`433`	`+ const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u);`
	`434`	`+ auto restoredAsVec = quantized.getValue() ^ switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask);`
	`435`	`+ restoredAsVec += switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(1u), negativeMask);`
	`436`	`+ return value_type_t<CacheFormat>(restoredAsVec & xorflag);`
`408`	`437`	`}`
`409`	`438`
`410`	`439`	`template<uint32_t dimensions, uint32_t quantizationBits>`
`411`		`- static inline hlsl::float32_t3 findBestFit(const hlsl::float32_t3& value)`
	`440`	`+ static inline hlsl::vector<hlsl::float32_t, dimensions> findBestFit(const hlsl::vector<hlsl::float32_t, dimensions>& value)`
`412`	`441`	`{`
`413`	`442`	`static_assert(dimensions>1u,"No point");`
`414`	`443`	`static_assert(dimensions<=4u,"High Dimensions are Hard!");`
`415`	`444`
`416`	`445`	`const auto vectorForDots = hlsl::normalize(value);`
`417`	`446`
`418`	`447`	`//`
`419`		`- hlsl::float32_t3 fittingVector;`
`420`		`- hlsl::float32_t3 floorOffset;`
	`448`	`+ hlsl::vector<hlsl::float32_t, dimensions> fittingVector;`
	`449`	`+ hlsl::vector<hlsl::float32_t, dimensions> floorOffset;`
`421`	`450`	`constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u;`
`422`		`- hlsl::float32_t3 corners[cornerCount] = {};`
	`451`	`+ hlsl::vector<hlsl::float32_t, dimensions> corners[cornerCount] = {};`
`423`	`452`	`{`
`424`	`453`	`uint32_t maxDirCompIndex = 0u;`
`425`	`454`	`for (auto i=1u; i<dimensions; i++)`
`@@ -431,7 +460,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::`
`431`	`460`	`if (maxDirectionComp < std::sqrtf(0.9998f / float(dimensions)))`
`432`	`461`	`{`
`433`	`462`	`_NBL_DEBUG_BREAK_IF(true);`
`434`		`- return hlsl::float32_t3(0.f);`
	`463`	`+ return hlsl::vector<hlsl::float32_t, dimensions>(0.f);`
`435`	`464`	`}`
`436`	`465`	`fittingVector = value / maxDirectionComp;`
`437`	`466`	`floorOffset[maxDirCompIndex] = 0.499f;`
`@@ -453,9 +482,9 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::`
`453`	`482`	`}`
`454`	`483`	`}`
`455`	`484`
`456`		`- hlsl::float32_t3 bestFit;`
	`485`	`+ hlsl::vector<hlsl::float32_t, dimensions> bestFit;`
`457`	`486`	`float closestTo1 = -1.f;`
`458`		`- auto evaluateFit = [&](const hlsl::float32_t3& newFit) -> void`
	`487`	`+ auto evaluateFit = [&](const hlsl::vector<hlsl::float32_t, dimensions>& newFit) -> void`
`459`	`488`	`{`
`460`	`489`	`auto newFitLen = length(newFit);`
`461`	`490`	`const float dp = hlsl::dot(newFit,vectorForDots) / (newFitLen);`
`@@ -467,7 +496,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::`
`467`	`496`	`};`
`468`	`497`
`469`	`498`	`constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u;`
`470`		`- const hlsl::float32_t3 cubeHalfSizeND = hlsl::float32_t3(cubeHalfSize);`
	`499`	`+ const hlsl::vector<hlsl::float32_t, dimensions> cubeHalfSizeND = hlsl::vector<hlsl::float32_t, dimensions>(cubeHalfSize);`
`471`	`500`	`for (uint32_t n=cubeHalfSize; n>0u; n--)`
`472`	`501`	`{`
`473`	`502`	`//we'd use float addition in the interest of speed, to increment the loop`