1
+ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
2
+ // This file is part of the "Nabla Engine".
3
+ // For conditions of distribution and use, see copyright notice in nabla.h
4
+
5
+ #ifndef __NBL_ASSET_C_NORMAL_MAP_TO_DERIVATIVE_FILTER_H_INCLUDED__
6
+ #define __NBL_ASSET_C_NORMAL_MAP_TO_DERIVATIVE_FILTER_H_INCLUDED__
7
+
8
+ #include " nbl/core/core.h"
9
+
10
+ #include < type_traits>
11
+ #include < functional>
12
+
13
+ #include " nbl/asset/filters/CMatchedSizeInOutImageFilterCommon.h"
14
+ #include " CConvertFormatImageFilter.h"
15
+
16
+ namespace nbl
17
+ {
18
+ namespace asset
19
+ {
20
+
21
+ class CNormalMapToDerivativeFilterBase
22
+ {
23
+ public:
24
+ class CNormalMapToDerivativeStateBase
25
+ {
26
+ public:
27
+
28
+ static inline constexpr size_t decodeTypeByteSize = sizeof (double );
29
+ static inline constexpr size_t forcedScratchChannelAmount = 4 ;
30
+ uint8_t * scratchMemory = nullptr ; // !< memory covering all regions used for temporary filling within computation of sum values
31
+ size_t scratchMemoryByteSize = {}; // !< required byte size for entire scratch memory
32
+
33
+ static inline size_t getRequiredScratchByteSize (asset::VkExtent3D extent)
34
+ {
35
+ size_t retval = extent.width * extent.height * extent.depth * decodeTypeByteSize * forcedScratchChannelAmount;
36
+
37
+ return retval;
38
+ }
39
+
40
+ /*
41
+ Layer ID is relative to outBaseLayer in state
42
+ */
43
+
44
+ const std::array<double , forcedScratchChannelAmount>& getAbsoluteLayerScaleValue (size_t layer)
45
+ {
46
+ if (!maxAbsLayerScaleValues.empty ())
47
+ return maxAbsLayerScaleValues[layer];
48
+ else
49
+ return {};
50
+ }
51
+
52
+ protected:
53
+ std::vector<std::array<double , forcedScratchChannelAmount>> maxAbsLayerScaleValues; // !< scales gained by the filter (each layer handled) for derivative map shader usage
54
+ };
55
+
56
+ protected:
57
+ CNormalMapToDerivativeFilterBase () {}
58
+ virtual ~CNormalMapToDerivativeFilterBase () {}
59
+
60
+ static inline bool validate (CNormalMapToDerivativeStateBase* state)
61
+ {
62
+ if (!state)
63
+ return false ;
64
+
65
+ return true ;
66
+ }
67
+ };
68
+
69
+ // ! Convert Normal Map to Derivative Normal Map
70
+ /*
71
+
72
+ */
73
+
74
+ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon , public CNormalMapToDerivativeFilterBase
75
+ {
76
+ public:
77
+ virtual ~CNormalMapToDerivativeFilter () {}
78
+
79
+ class CStateBase : public CMatchedSizeInOutImageFilterCommon ::state_type, public CNormalMapToDerivativeFilterBase::CNormalMapToDerivativeStateBase
80
+ {
81
+ public:
82
+ CStateBase () = default ;
83
+ virtual ~CStateBase () = default ;
84
+
85
+ private:
86
+
87
+ void resetLayerScaleValues ()
88
+ {
89
+ maxAbsLayerScaleValues.clear ();
90
+ }
91
+
92
+ friend class CNormalMapToDerivativeFilter ;
93
+ };
94
+ using state_type = CStateBase; // !< full combined state
95
+
96
+ static inline bool validate (state_type* state)
97
+ {
98
+ if (!CMatchedSizeInOutImageFilterCommon::validate (state))
99
+ return false ;
100
+
101
+ if (!CNormalMapToDerivativeFilterBase::validate (state))
102
+ return false ;
103
+
104
+ const ICPUImage::SCreationParams& inParams = state->inImage ->getCreationParameters ();
105
+ const ICPUImage::SCreationParams& outParams = state->outImage ->getCreationParameters ();
106
+ const auto inFormat = inParams.format ;
107
+ const auto outFormat = outParams.format ;
108
+
109
+ if (outFormat != asset::EF_R8G8_SNORM)
110
+ return false ;
111
+
112
+ if (state->scratchMemoryByteSize < state_type::getRequiredScratchByteSize (state->extent ))
113
+ return false ;
114
+
115
+ if (asset::getFormatChannelCount (inFormat) < 3 )
116
+ return false ;
117
+
118
+ return true ;
119
+ }
120
+
121
+ static inline bool execute (state_type* state)
122
+ {
123
+ if (!validate (state))
124
+ return false ;
125
+
126
+ state->resetLayerScaleValues ();
127
+
128
+ auto checkFormat = state->inImage ->getCreationParameters ().format ;
129
+ if (isIntegerFormat (checkFormat))
130
+ return executeInterprated (state, reinterpret_cast <uint64_t *>(state->scratchMemory ));
131
+ else
132
+ return executeInterprated (state, reinterpret_cast <double *>(state->scratchMemory ));
133
+ }
134
+
135
+ private:
136
+
137
+ template <typename decodeType> // !< double or uint64_t
138
+ static inline bool executeInterprated (state_type* state, decodeType* scratchMemory)
139
+ {
140
+ const asset::E_FORMAT inFormat = state->inImage ->getCreationParameters ().format ;
141
+ const auto inTexelByteSize = asset::getTexelOrBlockBytesize (inFormat);
142
+ const auto currentChannelCount = asset::getFormatChannelCount (inFormat);
143
+ const auto arrayLayers = state->inImage ->getCreationParameters ().arrayLayers ;
144
+ static constexpr auto maxChannels = 4u ;
145
+
146
+ #ifdef _NBL_DEBUG
147
+ memset (scratchMemory, 0 , state->scratchMemoryByteSize );
148
+ #endif // _NBL_DEBUG
149
+
150
+ const core::vector3du32_SIMD scratchByteStrides = TexelBlockInfo (asset::E_FORMAT::EF_R64G64B64A64_SFLOAT).convert3DTexelStridesTo1DByteStrides (state->extentLayerCount );
151
+ const auto scratchTexelByteSize = scratchByteStrides[0 ];
152
+
153
+ // I wonder if we should let somebody pass through more than 1 layer, though I find it cool
154
+
155
+ const auto && [copyInBaseLayer, copyOutBaseLayer, copyLayerCount] = std::make_tuple (state->inBaseLayer , state->outBaseLayer , state->layerCount );
156
+ state->layerCount = 1u ;
157
+
158
+ auto resetState = [&]()
159
+ {
160
+ state->inBaseLayer = copyInBaseLayer;
161
+ state->outBaseLayer = copyOutBaseLayer;
162
+ state->layerCount = copyLayerCount;
163
+ };
164
+
165
+ for (uint16_t w = 0u ; w < copyLayerCount; ++w)
166
+ {
167
+ std::array<decodeType, maxChannels> maxAbsoluteDecodeValues = {};
168
+
169
+ {
170
+ const uint8_t * inData = reinterpret_cast <const uint8_t *>(state->inImage ->getBuffer ()->getPointer ());
171
+ const auto blockDims = asset::getBlockDimensions (state->inImage ->getCreationParameters ().format );
172
+ static constexpr uint8_t maxPlanes = 4 ;
173
+
174
+ auto decode = [&](uint32_t readBlockArrayOffset, core::vectorSIMDu32 readBlockPos) -> void
175
+ {
176
+ core::vectorSIMDu32 localOutPos = readBlockPos * blockDims - core::vectorSIMDu32 (state->inOffset .x , state->inOffset .y , state->inOffset .z );
177
+
178
+ auto * inDataAdress = inData + readBlockArrayOffset;
179
+ const void * inSourcePixels[maxPlanes] = { inDataAdress, nullptr , nullptr , nullptr };
180
+
181
+ decodeType decodeBuffer[maxChannels] = {};
182
+ for (auto blockY = 0u ; blockY < blockDims.y ; blockY++)
183
+ for (auto blockX = 0u ; blockX < blockDims.x ; blockX++)
184
+ {
185
+ asset::decodePixelsRuntime (inFormat, inSourcePixels, decodeBuffer, blockX, blockY);
186
+ const size_t offset = asset::IImage::SBufferCopy::getLocalByteOffset (core::vector3du32_SIMD (localOutPos.x + blockX, localOutPos.y + blockY, localOutPos.z ), scratchByteStrides);
187
+ memcpy (reinterpret_cast <uint8_t *>(scratchMemory) + offset, decodeBuffer, scratchTexelByteSize);
188
+ }
189
+ };
190
+
191
+ IImage::SSubresourceLayers subresource = { static_cast <IImage::E_ASPECT_FLAGS>(0u ), state->inMipLevel , state->inBaseLayer , 1 };
192
+ CMatchedSizeInOutImageFilterCommon::state_type::TexelRange range = { state->inOffset ,state->extent };
193
+ CBasicImageFilterCommon::clip_region_functor_t clipFunctor (subresource, range, inFormat);
194
+
195
+ auto & inRegions = state->inImage ->getRegions (state->inMipLevel );
196
+ CBasicImageFilterCommon::executePerRegion (state->inImage , decode, inRegions.begin (), inRegions.end (), clipFunctor);
197
+ }
198
+
199
+ {
200
+ auto getScratchPixel = [&](core::vector4di32_SIMD readBlockPos) -> decodeType*
201
+ {
202
+ const size_t scratchOffset = asset::IImage::SBufferCopy::getLocalByteOffset (core::vector3du32_SIMD (readBlockPos.x , readBlockPos.y , readBlockPos.z , 0 ), scratchByteStrides);
203
+ return reinterpret_cast <decodeType*>(reinterpret_cast <uint8_t *>(scratchMemory) + scratchOffset);
204
+ };
205
+
206
+ auto computeDerivativeTexel = [&](core::vectorSIMDi32 readBlockPos) -> void
207
+ {
208
+ decodeType* current = getScratchPixel (readBlockPos);
209
+ auto & [x, y, z, a] = std::make_tuple (*current, *(current + 1 ), *(current + 2 ), *(current + 3 ));
210
+
211
+ std::for_each (current, current + currentChannelCount,
212
+ [&](const decodeType& itrValue)
213
+ {
214
+ uint8_t offset = &itrValue - current;
215
+ const decodeType absoluteValue = core::abs (itrValue);
216
+
217
+ if (maxAbsoluteDecodeValues[offset] < absoluteValue)
218
+ maxAbsoluteDecodeValues[offset] = absoluteValue;
219
+ }
220
+ );
221
+
222
+ x = -x / z;
223
+ y = -y / z;
224
+ };
225
+
226
+ {
227
+ core::vector3du32_SIMD localCoord;
228
+ for (auto & z = localCoord[2 ] = 0u ; z < state->extent .depth ; ++z)
229
+ for (auto & y = localCoord[1 ] = 0u ; y < state->extent .height ; ++y)
230
+ for (auto & x = localCoord[0 ] = 0u ; x < state->extent .width ; ++x)
231
+ computeDerivativeTexel (core::vectorSIMDu32 (x, y, z));
232
+ }
233
+
234
+ auto & maxAbsLayerScaleValues = state->maxAbsLayerScaleValues .emplace_back ();
235
+ for (auto & absLayerScaleValue : maxAbsLayerScaleValues)
236
+ absLayerScaleValue = maxAbsoluteDecodeValues[&absLayerScaleValue - &maxAbsLayerScaleValues[0 ]];
237
+
238
+ // what about normalize, should it be done like SAT ?
239
+
240
+ {
241
+ uint8_t * outData = reinterpret_cast <uint8_t *>(state->outImage ->getBuffer ()->getPointer ());
242
+
243
+ auto encode = [&](uint32_t writeBlockArrayOffset, core::vectorSIMDu32 readBlockPos) -> void
244
+ {
245
+ // encoding format cannot be block compressed so in this case block==texel
246
+ auto localOutPos = readBlockPos - core::vectorSIMDu32 (state->outOffset .x , state->outOffset .y , state->outOffset .z , readBlockPos.w ); // force 0 on .w compoment to obtain valid offset
247
+ uint8_t * outDataAdress = outData + writeBlockArrayOffset;
248
+
249
+ const size_t offset = asset::IImage::SBufferCopy::getLocalByteOffset (localOutPos, scratchByteStrides);
250
+ auto * data = reinterpret_cast <uint8_t *>(scratchMemory) + offset;
251
+ asset::encodePixels<asset::EF_R8G8_SNORM, double >(outDataAdress, reinterpret_cast <double *>(data)); // overrrides texels, so region-overlapping case is fine
252
+ };
253
+
254
+ IImage::SSubresourceLayers subresource = { static_cast <IImage::E_ASPECT_FLAGS>(0u ), state->outMipLevel , state->outBaseLayer , 1 };
255
+ CMatchedSizeInOutImageFilterCommon::state_type::TexelRange range = { state->outOffset ,state->extent };
256
+ CBasicImageFilterCommon::clip_region_functor_t clipFunctor (subresource, range, asset::EF_R8G8_SNORM);
257
+
258
+ auto & outRegions = state->outImage ->getRegions (state->outMipLevel );
259
+ CBasicImageFilterCommon::executePerRegion (state->outImage , encode, outRegions.begin (), outRegions.end (), clipFunctor);
260
+ }
261
+ }
262
+
263
+ ++state->inBaseLayer ;
264
+ ++state->outBaseLayer ;
265
+ }
266
+
267
+ resetState ();
268
+ return true ;
269
+ }
270
+ };
271
+
272
+ } // end namespace asset
273
+ } // end namespace nbl
274
+
275
+ #endif // __NBL_ASSET_C_NORMAL_MAP_TO_DERIVATIVE_FILTER_H_INCLUDED__
0 commit comments