@@ -43,23 +43,23 @@ class CDirQuantCacheBase
43
43
44
44
Vector8u3 () : x (0u ),y (0u ),z (0u ) {}
45
45
Vector8u3 (const Vector8u3&) = default ;
46
- explicit Vector8u3 (const hlsl::float32_t3 & val)
46
+ explicit Vector8u3 (const hlsl::uint32_t4 & val)
47
47
{
48
48
operator =(val);
49
49
}
50
50
51
51
Vector8u3& operator =(const Vector8u3&) = default ;
52
- Vector8u3& operator =(const hlsl::float32_t3 & val)
52
+ Vector8u3& operator =(const hlsl::uint32_t4 & val)
53
53
{
54
54
x = val.x ;
55
55
y = val.y ;
56
56
z = val.z ;
57
57
return *this ;
58
58
}
59
59
60
- hlsl::float32_t3 getValue () const
60
+ hlsl::uint32_t4 getValue () const
61
61
{
62
- return { x, y, z };
62
+ return { x, y, z, 0 };
63
63
}
64
64
65
65
@@ -75,24 +75,24 @@ class CDirQuantCacheBase
75
75
76
76
Vector8u4 () : x (0u ),y (0u ),z (0u ),w (0u ) {}
77
77
Vector8u4 (const Vector8u4&) = default ;
78
- explicit Vector8u4 (const hlsl::float32_t3 & val)
78
+ explicit Vector8u4 (const hlsl::uint32_t4 & val)
79
79
{
80
80
operator =(val);
81
81
}
82
82
83
83
Vector8u4& operator =(const Vector8u4&) = default ;
84
- Vector8u4& operator =(const hlsl::float32_t3 & val)
84
+ Vector8u4& operator =(const hlsl::uint32_t4 & val)
85
85
{
86
86
x = val.x ;
87
87
y = val.y ;
88
88
z = val.z ;
89
- w = 0 ;
89
+ w = val. w ;
90
90
return *this ;
91
91
}
92
92
93
- hlsl::float32_t3 getValue () const
93
+ hlsl::uint32_t4 getValue () const
94
94
{
95
- return { x, y, z };
95
+ return { x, y, z, w };
96
96
}
97
97
98
98
private:
@@ -109,17 +109,16 @@ class CDirQuantCacheBase
109
109
110
110
Vector1010102 () : storage (0u ) {}
111
111
Vector1010102 (const Vector1010102&) = default ;
112
- explicit Vector1010102 (const hlsl::float32_t3 & val)
112
+ explicit Vector1010102 (const hlsl::uint32_t4 & val)
113
113
{
114
114
operator =(val);
115
115
}
116
116
117
117
Vector1010102& operator =(const Vector1010102&) = default ;
118
- Vector1010102& operator =(const hlsl::float32_t3 & val)
118
+ Vector1010102& operator =(const hlsl::uint32_t4 & val)
119
119
{
120
120
constexpr auto storageBits = quantizationBits + 1u ;
121
- hlsl::uint32_t3 u32_val = { val.x , val.y , val.z };
122
- storage = u32_val.x | (u32_val.y << storageBits) | (u32_val.z << (storageBits * 2u ));
121
+ storage = val.x | (val.y << storageBits) | (val.z << (storageBits * 2u ));
123
122
return *this ;
124
123
}
125
124
@@ -132,11 +131,11 @@ class CDirQuantCacheBase
132
131
return storage==other.storage ;
133
132
}
134
133
135
- hlsl::float32_t3 getValue () const
134
+ hlsl::uint32_t4 getValue () const
136
135
{
137
136
constexpr auto storageBits = quantizationBits + 1u ;
138
137
const auto mask = (0x1u << storageBits) - 1u ;
139
- return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2 )) & mask};
138
+ return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2 )) & mask, 0 };
140
139
}
141
140
142
141
private:
@@ -151,23 +150,23 @@ class CDirQuantCacheBase
151
150
152
151
Vector16u3 () : x (0u ),y (0u ),z (0u ) {}
153
152
Vector16u3 (const Vector16u3&) = default ;
154
- explicit Vector16u3 (const hlsl::float32_t3 & val)
153
+ explicit Vector16u3 (const hlsl::uint32_t4 & val)
155
154
{
156
155
operator =(val);
157
156
}
158
157
159
158
Vector16u3& operator =(const Vector16u3&) = default ;
160
- Vector16u3& operator =(const hlsl::float32_t3 & val)
159
+ Vector16u3& operator =(const hlsl::uint32_t4 & val)
161
160
{
162
161
x = val.x ;
163
162
y = val.y ;
164
163
z = val.z ;
165
164
return *this ;
166
165
}
167
166
168
- hlsl::float32_t3 getValue () const
167
+ hlsl::uint32_t4 getValue () const
169
168
{
170
- return { x, y, z };
169
+ return { x, y, z, 0 };
171
170
}
172
171
173
172
private:
@@ -182,24 +181,24 @@ class CDirQuantCacheBase
182
181
183
182
Vector16u4 () : x (0u ),y (0u ),z (0u ),w (0u ) {}
184
183
Vector16u4 (const Vector16u4&) = default ;
185
- explicit Vector16u4 (const hlsl::float32_t3 & val)
184
+ explicit Vector16u4 (const hlsl::uint32_t4 & val)
186
185
{
187
186
operator =(val);
188
187
}
189
188
190
189
Vector16u4& operator =(const Vector16u4&) = default ;
191
- Vector16u4& operator =(const hlsl::float32_t3 & val)
190
+ Vector16u4& operator =(const hlsl::uint32_t4 & val)
192
191
{
193
192
x = val.x ;
194
193
y = val.y ;
195
194
z = val.z ;
196
- w = 0 ;
195
+ w = val. w ;
197
196
return *this ;
198
197
}
199
198
200
- hlsl::float32_t3 getValue () const
199
+ hlsl::float32_t4 getValue () const
201
200
{
202
- return { x, y, z };
201
+ return { x, y, z, w };
203
202
}
204
203
205
204
private:
@@ -379,11 +378,28 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
379
378
std::tuple<cache_type_t <Formats>...> cache;
380
379
381
380
template <uint32_t dimensions, E_FORMAT CacheFormat>
382
- value_type_t <CacheFormat> quantize (const hlsl::float32_t3 & value)
381
+ value_type_t <CacheFormat> quantize (const hlsl::vector<hlsl:: float32_t , dimensions> & value)
383
382
{
384
- const auto negativeMask = lessThan (value, hlsl::float32_t3 (0 .0f ));
385
-
386
- const hlsl::float32_t3 absValue = abs (value);
383
+ auto to_float32_t4 = [](hlsl::vector<hlsl::float32_t , dimensions> src) -> hlsl::float32_t4
384
+ {
385
+ if constexpr (dimensions == 1 )
386
+ {
387
+ return {src.x , 0 , 0 , 0 };
388
+ } else if constexpr (dimensions == 2 )
389
+ {
390
+ return {src.x , src.y , 0 , 0 };
391
+ } else if constexpr (dimensions == 3 )
392
+ {
393
+ return {src.x , src.y , src.z , 0 };
394
+ } else if constexpr (dimensions == 4 )
395
+ {
396
+ return {src.x , src.y , src.z , src.w };
397
+ }
398
+ };
399
+
400
+ const auto negativeMask = to_float32_t4 (lessThan (value, hlsl::vector<hlsl::float32_t , dimensions>(0 .0f )));
401
+
402
+ const hlsl::vector<hlsl::float32_t , dimensions> absValue = abs (value);
387
403
const auto key = Key (absValue);
388
404
389
405
constexpr auto quantizationBits = quantization_bits_v<CacheFormat>;
@@ -397,29 +413,42 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
397
413
{
398
414
const auto fit = findBestFit<dimensions,quantizationBits>(absValue);
399
415
400
- quantized = abs (fit);
416
+ const auto abs_fit = to_float32_t4 (abs (fit));
417
+ quantized = hlsl::uint32_t4 (abs_fit.x , abs_fit.y , abs_fit.z , abs_fit.w );
418
+
401
419
insertIntoCache<CacheFormat>(key,quantized);
402
420
}
403
421
}
404
422
405
- // return quantized.
406
- const auto negativeMulVec = hlsl::float32_t3 (negativeMask.x ? -1 : 1 , negativeMask.y ? -1 : 1 , negativeMask.z ? -1 : 1 );
407
- return value_type_t <CacheFormat>(negativeMulVec * quantized.getValue ());
423
+ auto switch_vec = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)
424
+ {
425
+ hlsl::uint32_t4 retval;
426
+ retval.x = mask.x ? val2.x : val1.x ;
427
+ retval.y = mask.y ? val2.y : val1.y ;
428
+ retval.z = mask.z ? val2.z : val1.z ;
429
+ retval.w = mask.w ? val2.w : val1.w ;
430
+ return retval;
431
+ };
432
+
433
+ const hlsl::uint32_t4 xorflag ((0x1u << (quantizationBits + 1u )) - 1u );
434
+ auto restoredAsVec = quantized.getValue () ^ switch_vec (hlsl::uint32_t4 (0u ), hlsl::uint32_t4 (xorflag), negativeMask);
435
+ restoredAsVec += switch_vec (hlsl::uint32_t4 (0u ), hlsl::uint32_t4 (1u ), negativeMask);
436
+ return value_type_t <CacheFormat>(restoredAsVec & xorflag);
408
437
}
409
438
410
439
template <uint32_t dimensions, uint32_t quantizationBits>
411
- static inline hlsl::float32_t3 findBestFit (const hlsl::float32_t3 & value)
440
+ static inline hlsl::vector<hlsl:: float32_t , dimensions> findBestFit (const hlsl::vector<hlsl:: float32_t , dimensions> & value)
412
441
{
413
442
static_assert (dimensions>1u ," No point" );
414
443
static_assert (dimensions<=4u ," High Dimensions are Hard!" );
415
444
416
445
const auto vectorForDots = hlsl::normalize (value);
417
446
418
447
//
419
- hlsl::float32_t3 fittingVector;
420
- hlsl::float32_t3 floorOffset;
448
+ hlsl::vector<hlsl:: float32_t , dimensions> fittingVector;
449
+ hlsl::vector<hlsl:: float32_t , dimensions> floorOffset;
421
450
constexpr uint32_t cornerCount = (0x1u <<(dimensions-1u ))-1u ;
422
- hlsl::float32_t3 corners[cornerCount] = {};
451
+ hlsl::vector<hlsl:: float32_t , dimensions> corners[cornerCount] = {};
423
452
{
424
453
uint32_t maxDirCompIndex = 0u ;
425
454
for (auto i=1u ; i<dimensions; i++)
@@ -431,7 +460,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
431
460
if (maxDirectionComp < std::sqrtf (0 .9998f / float (dimensions)))
432
461
{
433
462
_NBL_DEBUG_BREAK_IF (true );
434
- return hlsl::float32_t3 (0 .f );
463
+ return hlsl::vector<hlsl:: float32_t , dimensions> (0 .f );
435
464
}
436
465
fittingVector = value / maxDirectionComp;
437
466
floorOffset[maxDirCompIndex] = 0 .499f ;
@@ -453,9 +482,9 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
453
482
}
454
483
}
455
484
456
- hlsl::float32_t3 bestFit;
485
+ hlsl::vector<hlsl:: float32_t , dimensions> bestFit;
457
486
float closestTo1 = -1 .f ;
458
- auto evaluateFit = [&](const hlsl::float32_t3 & newFit) -> void
487
+ auto evaluateFit = [&](const hlsl::vector<hlsl:: float32_t , dimensions> & newFit) -> void
459
488
{
460
489
auto newFitLen = length (newFit);
461
490
const float dp = hlsl::dot (newFit,vectorForDots) / (newFitLen);
@@ -467,7 +496,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
467
496
};
468
497
469
498
constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u ;
470
- const hlsl::float32_t3 cubeHalfSizeND = hlsl::float32_t3 (cubeHalfSize);
499
+ const hlsl::vector<hlsl:: float32_t , dimensions> cubeHalfSizeND = hlsl::vector<hlsl:: float32_t , dimensions> (cubeHalfSize);
471
500
for (uint32_t n=cubeHalfSize; n>0u ; n--)
472
501
{
473
502
// we'd use float addition in the interest of speed, to increment the loop
0 commit comments