11
11
#include < functional>
12
12
13
13
#include " nbl/asset/filters/CMatchedSizeInOutImageFilterCommon.h"
14
+ #include " nbl/asset/filters/CSwizzleAndConvertImageFilter.h"
14
15
#include " CConvertFormatImageFilter.h"
15
16
16
17
namespace nbl
17
18
{
18
19
namespace asset
19
20
{
20
21
21
- class CNormalMapToDerivativeFilterBase
22
+ template <typename Swizzle>
23
+ class CNormalMapToDerivativeFilterBase : public impl ::CSwizzleableAndDitherableFilterBase<false , false , Swizzle, IdentityDither>
22
24
{
23
25
public:
24
- class CNormalMapToDerivativeStateBase
26
+ class CNormalMapToDerivativeStateBase : public impl ::CSwizzleableAndDitherableFilterBase< false , false , Swizzle, IdentityDither>::state_type
25
27
{
26
28
public:
27
29
28
- static inline constexpr size_t decodeTypeByteSize = sizeof (double );
29
- static inline constexpr size_t forcedScratchChannelAmount = 4 ;
30
+ using decodeType = float ;
31
+ static inline constexpr size_t decodeTypeByteSize = sizeof (float );
32
+ static inline constexpr size_t forcedScratchChannelAmount = 2 ;
30
33
uint8_t * scratchMemory = nullptr ; // !< memory covering all regions used for temporary filling within computation of sum values
31
34
size_t scratchMemoryByteSize = {}; // !< required byte size for entire scratch memory
35
+ bool normalizeImageByTotalABSValues = true ; // !< force normalizing by maximum absolute values
32
36
33
- static inline size_t getRequiredScratchByteSize (asset::VkExtent3D extent)
37
+ /*
38
+ layerCount - layer count used to execute the filter, not global layer count!
39
+ extent - extent of input image at chosen mip map level
40
+ */
41
+
42
+ static inline size_t getRequiredScratchByteSize (size_t layerCount, asset::VkExtent3D extent)
34
43
{
35
- size_t retval = extent.width * extent.height * extent.depth * decodeTypeByteSize * forcedScratchChannelAmount;
44
+ size_t retval = extent.width * extent.height * extent.depth * decodeTypeByteSize * forcedScratchChannelAmount + (layerCount * decodeTypeByteSize * forcedScratchChannelAmount) ;
36
45
37
46
return retval;
38
47
}
@@ -41,15 +50,28 @@ class CNormalMapToDerivativeFilterBase
41
50
Layer ID is relative to outBaseLayer in state
42
51
*/
43
52
44
- const std::array< double , forcedScratchChannelAmount>& getAbsoluteLayerScaleValue ( size_t layer)
53
+ enum E_SCALE_FACTOR
45
54
{
46
- if (!maxAbsLayerScaleValues.empty ())
47
- return maxAbsLayerScaleValues[layer];
55
+ ESF_X,
56
+ ESF_Y,
57
+ ESF_COUNT
58
+ };
59
+
60
+ const float getAbsoluteLayerScaleValue (size_t layer, E_SCALE_FACTOR scaleFactor)
61
+ {
62
+ if (!scaleValuesPointer)
63
+ {
64
+ auto offset = layer * forcedScratchChannelAmount + scaleFactor == ESF_X ? 0 : 1 ;
65
+ return *(scaleValuesPointer + offset);
66
+ }
48
67
else
49
- return {};
68
+ return 0 ; // or maybe assert?
50
69
}
51
70
52
71
protected:
72
+
73
+ float * scaleValuesPointer = nullptr ;
74
+
53
75
std::vector<std::array<double , forcedScratchChannelAmount>> maxAbsLayerScaleValues; // !< scales gained by the filter (each layer handled) for derivative map shader usage
54
76
};
55
77
@@ -62,6 +84,15 @@ class CNormalMapToDerivativeFilterBase
62
84
if (!state)
63
85
return false ;
64
86
87
+ if (!state->scratchMemory )
88
+ return false ;
89
+
90
+ if (state->scratchMemoryByteSize == 0 )
91
+ return false ;
92
+
93
+ if (!impl::CSwizzleableAndDitherableFilterBase<false , false , Swizzle, IdentityDither>::validate (state))
94
+ return false ;
95
+
65
96
return true ;
66
97
}
67
98
};
@@ -71,22 +102,28 @@ class CNormalMapToDerivativeFilterBase
71
102
72
103
*/
73
104
74
- class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon , public CNormalMapToDerivativeFilterBase
105
+ template <typename Swizzle = DefaultSwizzle>
106
+ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon , public CNormalMapToDerivativeFilterBase <Swizzle>
75
107
{
76
108
public:
77
109
virtual ~CNormalMapToDerivativeFilter () {}
78
110
79
- class CStateBase : public CMatchedSizeInOutImageFilterCommon ::state_type, public CNormalMapToDerivativeFilterBase::CNormalMapToDerivativeStateBase
111
+ class CStateBase : public CMatchedSizeInOutImageFilterCommon ::state_type, public CNormalMapToDerivativeFilterBase<Swizzle> ::CNormalMapToDerivativeStateBase
80
112
{
81
113
public:
82
114
CStateBase () = default ;
83
115
virtual ~CStateBase () = default ;
84
116
85
117
private:
86
118
119
+ void setLayerScaleValuesOffset ()
120
+ {
121
+ scaleValuesPointer = reinterpret_cast <float *>(scratchMemory) + (extent.width * extent.height * extent.depth * forcedScratchChannelAmount);
122
+ }
123
+
87
124
void resetLayerScaleValues ()
88
125
{
89
- maxAbsLayerScaleValues. clear ( );
126
+ memset ( const_cast < float *>(scaleValuesPointer), 0 , layerCount * forcedScratchChannelAmount * decodeTypeByteSize );
90
127
}
91
128
92
129
friend class CNormalMapToDerivativeFilter ;
@@ -98,21 +135,25 @@ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon,
98
135
if (!CMatchedSizeInOutImageFilterCommon::validate (state))
99
136
return false ;
100
137
101
- if (!CNormalMapToDerivativeFilterBase::validate (state))
138
+ if (!CNormalMapToDerivativeFilterBase<Swizzle> ::validate (state))
102
139
return false ;
103
140
104
141
const ICPUImage::SCreationParams& inParams = state->inImage ->getCreationParameters ();
105
142
const ICPUImage::SCreationParams& outParams = state->outImage ->getCreationParameters ();
106
143
const auto inFormat = inParams.format ;
107
144
const auto outFormat = outParams.format ;
108
145
109
- if (outFormat != asset::EF_R8G8_SNORM )
146
+ if (state-> scratchMemoryByteSize < state_type::getRequiredScratchByteSize (state-> layerCount , state-> extent ) )
110
147
return false ;
111
148
112
- if (state-> scratchMemoryByteSize < state_type::getRequiredScratchByteSize (state-> extent ) )
149
+ if (asset::getFormatChannelCount (inFormat) < 3 && asset::getFormatChannelCount (outFormat) != 2 )
113
150
return false ;
114
151
115
- if (asset::getFormatChannelCount (inFormat) < 3 )
152
+ if (asset::isIntegerFormat (inFormat) || asset::isIntegerFormat (outFormat))
153
+ return false ;
154
+
155
+ // TODO: remove this later when we can actually write/encode to block formats
156
+ if (asset::isBlockCompressionFormat (outFormat))
116
157
return false ;
117
158
118
159
return true ;
@@ -123,38 +164,27 @@ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon,
123
164
if (!validate (state))
124
165
return false ;
125
166
167
+ state->setLayerScaleValuesOffset ();
126
168
state->resetLayerScaleValues ();
127
169
128
- auto checkFormat = state->inImage ->getCreationParameters ().format ;
129
- if (isIntegerFormat (checkFormat))
130
- return executeInterprated (state, reinterpret_cast <uint64_t *>(state->scratchMemory ));
131
- else
132
- return executeInterprated (state, reinterpret_cast <double *>(state->scratchMemory ));
133
- }
134
-
135
- private:
136
-
137
- template <typename decodeType> // !< double or uint64_t
138
- static inline bool executeInterprated (state_type* state, decodeType* scratchMemory)
139
- {
140
170
const asset::E_FORMAT inFormat = state->inImage ->getCreationParameters ().format ;
171
+ const asset::E_FORMAT outFormat = state->outImage ->getCreationParameters ().format ;
141
172
const auto inTexelByteSize = asset::getTexelOrBlockBytesize (inFormat);
173
+ const auto outTexelByteSize = asset::getTexelOrBlockBytesize (outFormat);
142
174
const auto currentChannelCount = asset::getFormatChannelCount (inFormat);
143
175
const auto arrayLayers = state->inImage ->getCreationParameters ().arrayLayers ;
144
176
static constexpr auto maxChannels = 4u ;
145
177
146
178
#ifdef _NBL_DEBUG
147
- memset (scratchMemory, 0 , state->scratchMemoryByteSize );
179
+ memset (state-> scratchMemory , 0 , state->scratchMemoryByteSize );
148
180
#endif // _NBL_DEBUG
149
181
150
- const core::vector3du32_SIMD scratchByteStrides = TexelBlockInfo (asset::E_FORMAT::EF_R64G64B64A64_SFLOAT ).convert3DTexelStridesTo1DByteStrides (state->extentLayerCount );
182
+ const core::vector3du32_SIMD scratchByteStrides = TexelBlockInfo (asset::E_FORMAT::EF_R32G32_SFLOAT ).convert3DTexelStridesTo1DByteStrides (state->extentLayerCount );
151
183
const auto scratchTexelByteSize = scratchByteStrides[0 ];
152
184
153
- // I wonder if we should let somebody pass through more than 1 layer, though I find it cool
154
-
155
185
const auto && [copyInBaseLayer, copyOutBaseLayer, copyLayerCount] = std::make_tuple (state->inBaseLayer , state->outBaseLayer , state->layerCount );
156
186
state->layerCount = 1u ;
157
-
187
+
158
188
auto resetState = [&]()
159
189
{
160
190
state->inBaseLayer = copyInBaseLayer;
@@ -164,27 +194,37 @@ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon,
164
194
165
195
for (uint16_t w = 0u ; w < copyLayerCount; ++w)
166
196
{
167
- std::array<decodeType, maxChannels> maxAbsoluteDecodeValues = {} ;
197
+ float * decodeAbsValuesOffset = state-> scaleValuesPointer + (w * CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase::forcedScratchChannelAmount) ;
168
198
199
+ auto & xMaxDecodeAbsValue = *decodeAbsValuesOffset;
200
+ auto & yMaxDecodeAbsValue = *(decodeAbsValuesOffset + 1 );
169
201
{
170
202
const uint8_t * inData = reinterpret_cast <const uint8_t *>(state->inImage ->getBuffer ()->getPointer ());
171
203
const auto blockDims = asset::getBlockDimensions (state->inImage ->getCreationParameters ().format );
172
204
static constexpr uint8_t maxPlanes = 4 ;
173
205
174
- auto decode = [&](uint32_t readBlockArrayOffset, core::vectorSIMDu32 readBlockPos) -> void
206
+ auto decodeAndDivide = [&](uint32_t readBlockArrayOffset, core::vectorSIMDu32 readBlockPos) -> void
175
207
{
176
208
core::vectorSIMDu32 localOutPos = readBlockPos * blockDims - core::vectorSIMDu32 (state->inOffset .x , state->inOffset .y , state->inOffset .z );
177
209
178
210
auto * inDataAdress = inData + readBlockArrayOffset;
179
211
const void * inSourcePixels[maxPlanes] = { inDataAdress, nullptr , nullptr , nullptr };
180
212
181
- decodeType decodeBuffer[maxChannels] = {};
213
+ double decodeBuffer[maxChannels] = {}; // ASCT TODO?
214
+ double swizzledBuffer[maxChannels] = {}; // ASCT TODO?
215
+
182
216
for (auto blockY = 0u ; blockY < blockDims.y ; blockY++)
183
217
for (auto blockX = 0u ; blockX < blockDims.x ; blockX++)
184
218
{
185
- asset::decodePixelsRuntime (inFormat, inSourcePixels, decodeBuffer, blockX, blockY);
219
+ impl::CSwizzleableAndDitherableFilterBase<false , false , Swizzle, IdentityDither>::onDecode (inFormat, state, inSourcePixels, decodeBuffer, swizzledBuffer, blockX, blockY);
220
+
186
221
const size_t offset = asset::IImage::SBufferCopy::getLocalByteOffset (core::vector3du32_SIMD (localOutPos.x + blockX, localOutPos.y + blockY, localOutPos.z ), scratchByteStrides);
187
- memcpy (reinterpret_cast <uint8_t *>(scratchMemory) + offset, decodeBuffer, scratchTexelByteSize);
222
+ float * data = reinterpret_cast <float *>(state->scratchMemory + offset);
223
+
224
+ auto & [xDecode, yDecode, zDecode] = std::make_tuple (*swizzledBuffer, *(swizzledBuffer + 1 ), *(swizzledBuffer + 2 ));
225
+
226
+ *data = -xDecode / zDecode;
227
+ *(data + 1 ) = -yDecode / zDecode;
188
228
}
189
229
};
190
230
@@ -193,49 +233,61 @@ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon,
193
233
CBasicImageFilterCommon::clip_region_functor_t clipFunctor (subresource, range, inFormat);
194
234
195
235
auto & inRegions = state->inImage ->getRegions (state->inMipLevel );
196
- CBasicImageFilterCommon::executePerRegion (state->inImage , decode , inRegions.begin (), inRegions.end (), clipFunctor);
236
+ CBasicImageFilterCommon::executePerRegion (state->inImage , decodeAndDivide , inRegions.begin (), inRegions.end (), clipFunctor);
197
237
}
198
238
199
239
{
200
- auto getScratchPixel = [&](core::vector4di32_SIMD readBlockPos) -> decodeType*
240
+ auto getScratchPixel = [&](core::vector4di32_SIMD readBlockPos) -> CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase:: decodeType*
201
241
{
202
- const size_t scratchOffset = asset::IImage::SBufferCopy::getLocalByteOffset (core::vector3du32_SIMD (readBlockPos.x , readBlockPos.y , readBlockPos.z , 0 ), scratchByteStrides);
203
- return reinterpret_cast <decodeType*>(reinterpret_cast <uint8_t *>(scratchMemory) + scratchOffset);
242
+ const size_t scratchOffset = asset::IImage::SBufferCopy::getLocalByteOffset (core::vector3du32_SIMD (readBlockPos.x , readBlockPos.y , readBlockPos.z , 0 ), scratchByteStrides); // TODO
243
+ return reinterpret_cast <CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase:: decodeType*>(reinterpret_cast <uint8_t *>(state-> scratchMemory ) + scratchOffset);
204
244
};
205
245
206
- auto computeDerivativeTexel = [&](core::vectorSIMDi32 readBlockPos) -> void
246
+ auto computeMaxAbs = [&](core::vectorSIMDi32 readBlockPos) -> void
207
247
{
208
- decodeType * current = getScratchPixel (readBlockPos);
209
- auto & [x, y, z, a ] = std::make_tuple (*current, *(current + 1 ), *(current + 2 ), *(current + 3 ));
248
+ auto * current = getScratchPixel (readBlockPos);
249
+ auto & [x, y] = std::make_tuple (*current, *(current + 1 ));
210
250
211
- std::for_each (current, current + currentChannelCount,
212
- [&](const decodeType& itrValue)
213
- {
214
- uint8_t offset = &itrValue - current;
215
- const decodeType absoluteValue = core::abs (itrValue);
216
-
217
- if (maxAbsoluteDecodeValues[offset] < absoluteValue)
218
- maxAbsoluteDecodeValues[offset] = absoluteValue;
219
- }
220
- );
251
+ auto absoluteX = core::abs (x);
252
+ auto absoluteY = core::abs (y);
221
253
222
- x = -x / z;
223
- y = -y / z;
254
+ if (xMaxDecodeAbsValue < absoluteX)
255
+ xMaxDecodeAbsValue = absoluteX;
256
+
257
+ if (yMaxDecodeAbsValue < absoluteY)
258
+ yMaxDecodeAbsValue = absoluteY;
224
259
};
225
260
226
261
{
227
262
core::vector3du32_SIMD localCoord;
228
263
for (auto & z = localCoord[2 ] = 0u ; z < state->extent .depth ; ++z)
229
264
for (auto & y = localCoord[1 ] = 0u ; y < state->extent .height ; ++y)
230
265
for (auto & x = localCoord[0 ] = 0u ; x < state->extent .width ; ++x)
231
- computeDerivativeTexel (core::vectorSIMDu32 (x, y, z));
266
+ computeMaxAbs (core::vectorSIMDu32 (x, y, z));
232
267
}
233
268
234
- auto & maxAbsLayerScaleValues = state->maxAbsLayerScaleValues .emplace_back ();
235
- for (auto & absLayerScaleValue : maxAbsLayerScaleValues)
236
- absLayerScaleValue = maxAbsoluteDecodeValues[&absLayerScaleValue - &maxAbsLayerScaleValues[0 ]];
269
+ auto normalizeScratch = [&](bool isSigned)
270
+ {
271
+ core::vector3du32_SIMD localCoord;
272
+ for (auto & z = localCoord[2 ] = 0u ; z < state->extent .depth ; ++z)
273
+ for (auto & y = localCoord[1 ] = 0u ; y < state->extent .height ; ++y)
274
+ for (auto & x = localCoord[0 ] = 0u ; x < state->extent .width ; ++x)
275
+ {
276
+ const size_t scratchOffset = asset::IImage::SBufferCopy::getLocalByteOffset (localCoord, scratchByteStrides);
277
+ auto * entryScratchAdress = reinterpret_cast <CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase::decodeType*>(reinterpret_cast <uint8_t *>(state->scratchMemory ) + scratchOffset);
278
+
279
+ if (isSigned)
280
+ for (uint8_t channel = 0 ; channel < CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase::forcedScratchChannelAmount; ++channel)
281
+ entryScratchAdress[channel] = entryScratchAdress[channel] / decodeAbsValuesOffset[channel];
282
+ else
283
+ for (uint8_t channel = 0 ; channel < CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase::forcedScratchChannelAmount; ++channel)
284
+ entryScratchAdress[channel] = entryScratchAdress[channel] * 0 .5f / decodeAbsValuesOffset[channel] + 0 .5f ;
285
+ }
286
+ };
237
287
238
- // what about normalize, should it be done like SAT ?
288
+ bool normalized = asset::isNormalizedFormat (outFormat);
289
+ if (state->normalizeImageByTotalABSValues || normalized)
290
+ normalizeScratch (asset::isSignedFormat (outFormat));
239
291
240
292
{
241
293
uint8_t * outData = reinterpret_cast <uint8_t *>(state->outImage ->getBuffer ()->getPointer ());
@@ -247,13 +299,14 @@ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon,
247
299
uint8_t * outDataAdress = outData + writeBlockArrayOffset;
248
300
249
301
const size_t offset = asset::IImage::SBufferCopy::getLocalByteOffset (localOutPos, scratchByteStrides);
250
- auto * data = reinterpret_cast <uint8_t *>(scratchMemory) + offset;
251
- asset::encodePixels<asset::EF_R8G8_SNORM, double >(outDataAdress, reinterpret_cast <double *>(data)); // overrrides texels, so region-overlapping case is fine
302
+ auto * data = reinterpret_cast <uint8_t *>(state->scratchMemory ) + offset;
303
+
304
+ impl::CSwizzleAndConvertImageFilterBase<false , false , Swizzle, IdentityDither>::onEncode (outFormat, state, outDataAdress, data, localOutPos, 0 , 0 , CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase::forcedScratchChannelAmount); // overrrides texels, so region-overlapping case is fine
252
305
};
253
306
254
307
IImage::SSubresourceLayers subresource = { static_cast <IImage::E_ASPECT_FLAGS>(0u ), state->outMipLevel , state->outBaseLayer , 1 };
255
308
CMatchedSizeInOutImageFilterCommon::state_type::TexelRange range = { state->outOffset ,state->extent };
256
- CBasicImageFilterCommon::clip_region_functor_t clipFunctor (subresource, range, asset::EF_R8G8_SNORM );
309
+ CBasicImageFilterCommon::clip_region_functor_t clipFunctor (subresource, range, outFormat );
257
310
258
311
auto & outRegions = state->outImage ->getRegions (state->outMipLevel );
259
312
CBasicImageFilterCommon::executePerRegion (state->outImage , encode, outRegions.begin (), outRegions.end (), clipFunctor);
@@ -266,7 +319,7 @@ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon,
266
319
267
320
resetState ();
268
321
return true ;
269
- }
322
+ }
270
323
};
271
324
272
325
} // end namespace asset
0 commit comments