Skip to content

Commit 9994fda

Browse files
AnastaZIukCrisspl
authored andcommitted
Adjust to comments, add swizzles, normalizing, move division to the lambda
1 parent 2edba89 commit 9994fda

File tree

1 file changed

+120
-67
lines changed

1 file changed

+120
-67
lines changed

include/nbl/asset/filters/CNormalMapToDerivativeFilter.h

Lines changed: 120 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -11,28 +11,37 @@
1111
#include <functional>
1212

1313
#include "nbl/asset/filters/CMatchedSizeInOutImageFilterCommon.h"
14+
#include "nbl/asset/filters/CSwizzleAndConvertImageFilter.h"
1415
#include "CConvertFormatImageFilter.h"
1516

1617
namespace nbl
1718
{
1819
namespace asset
1920
{
2021

21-
class CNormalMapToDerivativeFilterBase
22+
template<typename Swizzle>
23+
class CNormalMapToDerivativeFilterBase : public impl::CSwizzleableAndDitherableFilterBase<false, false, Swizzle, IdentityDither>
2224
{
2325
public:
24-
class CNormalMapToDerivativeStateBase
26+
class CNormalMapToDerivativeStateBase : public impl::CSwizzleableAndDitherableFilterBase<false, false, Swizzle, IdentityDither>::state_type
2527
{
2628
public:
2729

28-
static inline constexpr size_t decodeTypeByteSize = sizeof(double);
29-
static inline constexpr size_t forcedScratchChannelAmount = 4;
30+
using decodeType = float;
31+
static inline constexpr size_t decodeTypeByteSize = sizeof(float);
32+
static inline constexpr size_t forcedScratchChannelAmount = 2;
3033
uint8_t* scratchMemory = nullptr; //!< memory covering all regions used for temporary filling within computation of sum values
3134
size_t scratchMemoryByteSize = {}; //!< required byte size for entire scratch memory
35+
bool normalizeImageByTotalABSValues = true; //!< force normalizing by maximum absolute values
3236

33-
static inline size_t getRequiredScratchByteSize(asset::VkExtent3D extent)
37+
/*
38+
layerCount - layer count used to execute the filter, not global layer count!
39+
extent - extent of input image at chosen mip map level
40+
*/
41+
42+
static inline size_t getRequiredScratchByteSize(size_t layerCount, asset::VkExtent3D extent)
3443
{
35-
size_t retval = extent.width * extent.height * extent.depth * decodeTypeByteSize * forcedScratchChannelAmount;
44+
size_t retval = extent.width * extent.height * extent.depth * decodeTypeByteSize * forcedScratchChannelAmount + (layerCount * decodeTypeByteSize * forcedScratchChannelAmount);
3645

3746
return retval;
3847
}
@@ -41,15 +50,28 @@ class CNormalMapToDerivativeFilterBase
4150
Layer ID is relative to outBaseLayer in state
4251
*/
4352

44-
const std::array<double, forcedScratchChannelAmount>& getAbsoluteLayerScaleValue(size_t layer)
53+
enum E_SCALE_FACTOR
4554
{
46-
if (!maxAbsLayerScaleValues.empty())
47-
return maxAbsLayerScaleValues[layer];
55+
ESF_X,
56+
ESF_Y,
57+
ESF_COUNT
58+
};
59+
60+
const float getAbsoluteLayerScaleValue(size_t layer, E_SCALE_FACTOR scaleFactor)
61+
{
62+
if (!scaleValuesPointer)
63+
{
64+
auto offset = layer * forcedScratchChannelAmount + scaleFactor == ESF_X ? 0 : 1;
65+
return *(scaleValuesPointer + offset);
66+
}
4867
else
49-
return {};
68+
return 0; // or maybe assert?
5069
}
5170

5271
protected:
72+
73+
float* scaleValuesPointer = nullptr;
74+
5375
std::vector<std::array<double, forcedScratchChannelAmount>> maxAbsLayerScaleValues; //!< scales gained by the filter (each layer handled) for derivative map shader usage
5476
};
5577

@@ -62,6 +84,15 @@ class CNormalMapToDerivativeFilterBase
6284
if (!state)
6385
return false;
6486

87+
if (!state->scratchMemory)
88+
return false;
89+
90+
if (state->scratchMemoryByteSize == 0)
91+
return false;
92+
93+
if (!impl::CSwizzleableAndDitherableFilterBase<false, false, Swizzle, IdentityDither>::validate(state))
94+
return false;
95+
6596
return true;
6697
}
6798
};
@@ -71,22 +102,28 @@ class CNormalMapToDerivativeFilterBase
71102
72103
*/
73104

74-
class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon, public CNormalMapToDerivativeFilterBase
105+
template<typename Swizzle = DefaultSwizzle>
106+
class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon, public CNormalMapToDerivativeFilterBase<Swizzle>
75107
{
76108
public:
77109
virtual ~CNormalMapToDerivativeFilter() {}
78110

79-
class CStateBase : public CMatchedSizeInOutImageFilterCommon::state_type, public CNormalMapToDerivativeFilterBase::CNormalMapToDerivativeStateBase
111+
class CStateBase : public CMatchedSizeInOutImageFilterCommon::state_type, public CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase
80112
{
81113
public:
82114
CStateBase() = default;
83115
virtual ~CStateBase() = default;
84116

85117
private:
86118

119+
void setLayerScaleValuesOffset()
120+
{
121+
scaleValuesPointer = reinterpret_cast<float*>(scratchMemory) + (extent.width * extent.height * extent.depth * forcedScratchChannelAmount);
122+
}
123+
87124
void resetLayerScaleValues()
88125
{
89-
maxAbsLayerScaleValues.clear();
126+
memset(const_cast<float*>(scaleValuesPointer), 0, layerCount * forcedScratchChannelAmount * decodeTypeByteSize);
90127
}
91128

92129
friend class CNormalMapToDerivativeFilter;
@@ -98,21 +135,25 @@ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon,
98135
if (!CMatchedSizeInOutImageFilterCommon::validate(state))
99136
return false;
100137

101-
if (!CNormalMapToDerivativeFilterBase::validate(state))
138+
if (!CNormalMapToDerivativeFilterBase<Swizzle>::validate(state))
102139
return false;
103140

104141
const ICPUImage::SCreationParams& inParams = state->inImage->getCreationParameters();
105142
const ICPUImage::SCreationParams& outParams = state->outImage->getCreationParameters();
106143
const auto inFormat = inParams.format;
107144
const auto outFormat = outParams.format;
108145

109-
if (outFormat != asset::EF_R8G8_SNORM)
146+
if (state->scratchMemoryByteSize < state_type::getRequiredScratchByteSize(state->layerCount, state->extent))
110147
return false;
111148

112-
if (state->scratchMemoryByteSize < state_type::getRequiredScratchByteSize(state->extent))
149+
if (asset::getFormatChannelCount(inFormat) < 3 && asset::getFormatChannelCount(outFormat) != 2)
113150
return false;
114151

115-
if (asset::getFormatChannelCount(inFormat) < 3 )
152+
if (asset::isIntegerFormat(inFormat) || asset::isIntegerFormat(outFormat))
153+
return false;
154+
155+
// TODO: remove this later when we can actually write/encode to block formats
156+
if (asset::isBlockCompressionFormat(outFormat))
116157
return false;
117158

118159
return true;
@@ -123,38 +164,27 @@ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon,
123164
if (!validate(state))
124165
return false;
125166

167+
state->setLayerScaleValuesOffset();
126168
state->resetLayerScaleValues();
127169

128-
auto checkFormat = state->inImage->getCreationParameters().format;
129-
if (isIntegerFormat(checkFormat))
130-
return executeInterprated(state, reinterpret_cast<uint64_t*>(state->scratchMemory));
131-
else
132-
return executeInterprated(state, reinterpret_cast<double*>(state->scratchMemory));
133-
}
134-
135-
private:
136-
137-
template<typename decodeType> //!< double or uint64_t
138-
static inline bool executeInterprated(state_type* state, decodeType* scratchMemory)
139-
{
140170
const asset::E_FORMAT inFormat = state->inImage->getCreationParameters().format;
171+
const asset::E_FORMAT outFormat = state->outImage->getCreationParameters().format;
141172
const auto inTexelByteSize = asset::getTexelOrBlockBytesize(inFormat);
173+
const auto outTexelByteSize = asset::getTexelOrBlockBytesize(outFormat);
142174
const auto currentChannelCount = asset::getFormatChannelCount(inFormat);
143175
const auto arrayLayers = state->inImage->getCreationParameters().arrayLayers;
144176
static constexpr auto maxChannels = 4u;
145177

146178
#ifdef _NBL_DEBUG
147-
memset(scratchMemory, 0, state->scratchMemoryByteSize);
179+
memset(state->scratchMemory, 0, state->scratchMemoryByteSize);
148180
#endif // _NBL_DEBUG
149181

150-
const core::vector3du32_SIMD scratchByteStrides = TexelBlockInfo(asset::E_FORMAT::EF_R64G64B64A64_SFLOAT).convert3DTexelStridesTo1DByteStrides(state->extentLayerCount);
182+
const core::vector3du32_SIMD scratchByteStrides = TexelBlockInfo(asset::E_FORMAT::EF_R32G32_SFLOAT).convert3DTexelStridesTo1DByteStrides(state->extentLayerCount);
151183
const auto scratchTexelByteSize = scratchByteStrides[0];
152184

153-
// I wonder if we should let somebody pass through more than 1 layer, though I find it cool
154-
155185
const auto&& [copyInBaseLayer, copyOutBaseLayer, copyLayerCount] = std::make_tuple(state->inBaseLayer, state->outBaseLayer, state->layerCount);
156186
state->layerCount = 1u;
157-
187+
158188
auto resetState = [&]()
159189
{
160190
state->inBaseLayer = copyInBaseLayer;
@@ -164,27 +194,37 @@ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon,
164194

165195
for (uint16_t w = 0u; w < copyLayerCount; ++w)
166196
{
167-
std::array<decodeType, maxChannels> maxAbsoluteDecodeValues = {};
197+
float* decodeAbsValuesOffset = state->scaleValuesPointer + (w * CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase::forcedScratchChannelAmount);
168198

199+
auto& xMaxDecodeAbsValue = *decodeAbsValuesOffset;
200+
auto& yMaxDecodeAbsValue = *(decodeAbsValuesOffset + 1);
169201
{
170202
const uint8_t* inData = reinterpret_cast<const uint8_t*>(state->inImage->getBuffer()->getPointer());
171203
const auto blockDims = asset::getBlockDimensions(state->inImage->getCreationParameters().format);
172204
static constexpr uint8_t maxPlanes = 4;
173205

174-
auto decode = [&](uint32_t readBlockArrayOffset, core::vectorSIMDu32 readBlockPos) -> void
206+
auto decodeAndDivide = [&](uint32_t readBlockArrayOffset, core::vectorSIMDu32 readBlockPos) -> void
175207
{
176208
core::vectorSIMDu32 localOutPos = readBlockPos * blockDims - core::vectorSIMDu32(state->inOffset.x, state->inOffset.y, state->inOffset.z);
177209

178210
auto* inDataAdress = inData + readBlockArrayOffset;
179211
const void* inSourcePixels[maxPlanes] = { inDataAdress, nullptr, nullptr, nullptr };
180212

181-
decodeType decodeBuffer[maxChannels] = {};
213+
double decodeBuffer[maxChannels] = {}; // ASCT TODO?
214+
double swizzledBuffer[maxChannels] = {}; // ASCT TODO?
215+
182216
for (auto blockY = 0u; blockY < blockDims.y; blockY++)
183217
for (auto blockX = 0u; blockX < blockDims.x; blockX++)
184218
{
185-
asset::decodePixelsRuntime(inFormat, inSourcePixels, decodeBuffer, blockX, blockY);
219+
impl::CSwizzleableAndDitherableFilterBase<false, false, Swizzle, IdentityDither>::onDecode(inFormat, state, inSourcePixels, decodeBuffer, swizzledBuffer, blockX, blockY);
220+
186221
const size_t offset = asset::IImage::SBufferCopy::getLocalByteOffset(core::vector3du32_SIMD(localOutPos.x + blockX, localOutPos.y + blockY, localOutPos.z), scratchByteStrides);
187-
memcpy(reinterpret_cast<uint8_t*>(scratchMemory) + offset, decodeBuffer, scratchTexelByteSize);
222+
float* data = reinterpret_cast<float*>(state->scratchMemory + offset);
223+
224+
auto& [xDecode, yDecode, zDecode] = std::make_tuple(*swizzledBuffer, *(swizzledBuffer + 1), *(swizzledBuffer + 2));
225+
226+
*data = -xDecode / zDecode;
227+
*(data + 1) = -yDecode / zDecode;
188228
}
189229
};
190230

@@ -193,49 +233,61 @@ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon,
193233
CBasicImageFilterCommon::clip_region_functor_t clipFunctor(subresource, range, inFormat);
194234

195235
auto& inRegions = state->inImage->getRegions(state->inMipLevel);
196-
CBasicImageFilterCommon::executePerRegion(state->inImage, decode, inRegions.begin(), inRegions.end(), clipFunctor);
236+
CBasicImageFilterCommon::executePerRegion(state->inImage, decodeAndDivide, inRegions.begin(), inRegions.end(), clipFunctor);
197237
}
198238

199239
{
200-
auto getScratchPixel = [&](core::vector4di32_SIMD readBlockPos) -> decodeType*
240+
auto getScratchPixel = [&](core::vector4di32_SIMD readBlockPos) -> CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase::decodeType*
201241
{
202-
const size_t scratchOffset = asset::IImage::SBufferCopy::getLocalByteOffset(core::vector3du32_SIMD(readBlockPos.x, readBlockPos.y, readBlockPos.z, 0), scratchByteStrides);
203-
return reinterpret_cast<decodeType*>(reinterpret_cast<uint8_t*>(scratchMemory) + scratchOffset);
242+
const size_t scratchOffset = asset::IImage::SBufferCopy::getLocalByteOffset(core::vector3du32_SIMD(readBlockPos.x, readBlockPos.y, readBlockPos.z, 0), scratchByteStrides); // TODO
243+
return reinterpret_cast<CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase::decodeType*>(reinterpret_cast<uint8_t*>(state->scratchMemory) + scratchOffset);
204244
};
205245

206-
auto computeDerivativeTexel = [&](core::vectorSIMDi32 readBlockPos) -> void
246+
auto computeMaxAbs = [&](core::vectorSIMDi32 readBlockPos) -> void
207247
{
208-
decodeType* current = getScratchPixel(readBlockPos);
209-
auto& [x, y, z, a] = std::make_tuple(*current, *(current + 1), *(current + 2), *(current + 3));
248+
auto* current = getScratchPixel(readBlockPos);
249+
auto& [x, y] = std::make_tuple(*current, *(current + 1));
210250

211-
std::for_each(current, current + currentChannelCount,
212-
[&](const decodeType& itrValue)
213-
{
214-
uint8_t offset = &itrValue - current;
215-
const decodeType absoluteValue = core::abs(itrValue);
216-
217-
if (maxAbsoluteDecodeValues[offset] < absoluteValue)
218-
maxAbsoluteDecodeValues[offset] = absoluteValue;
219-
}
220-
);
251+
auto absoluteX = core::abs(x);
252+
auto absoluteY = core::abs(y);
221253

222-
x = -x / z;
223-
y = -y / z;
254+
if (xMaxDecodeAbsValue < absoluteX)
255+
xMaxDecodeAbsValue = absoluteX;
256+
257+
if (yMaxDecodeAbsValue < absoluteY)
258+
yMaxDecodeAbsValue = absoluteY;
224259
};
225260

226261
{
227262
core::vector3du32_SIMD localCoord;
228263
for (auto& z = localCoord[2] = 0u; z < state->extent.depth; ++z)
229264
for (auto& y = localCoord[1] = 0u; y < state->extent.height; ++y)
230265
for (auto& x = localCoord[0] = 0u; x < state->extent.width; ++x)
231-
computeDerivativeTexel(core::vectorSIMDu32(x, y, z));
266+
computeMaxAbs(core::vectorSIMDu32(x, y, z));
232267
}
233268

234-
auto& maxAbsLayerScaleValues = state->maxAbsLayerScaleValues.emplace_back();
235-
for (auto& absLayerScaleValue : maxAbsLayerScaleValues)
236-
absLayerScaleValue = maxAbsoluteDecodeValues[&absLayerScaleValue - &maxAbsLayerScaleValues[0]];
269+
auto normalizeScratch = [&](bool isSigned)
270+
{
271+
core::vector3du32_SIMD localCoord;
272+
for (auto& z = localCoord[2] = 0u; z < state->extent.depth; ++z)
273+
for (auto& y = localCoord[1] = 0u; y < state->extent.height; ++y)
274+
for (auto& x = localCoord[0] = 0u; x < state->extent.width; ++x)
275+
{
276+
const size_t scratchOffset = asset::IImage::SBufferCopy::getLocalByteOffset(localCoord, scratchByteStrides);
277+
auto* entryScratchAdress = reinterpret_cast<CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase::decodeType*>(reinterpret_cast<uint8_t*>(state->scratchMemory) + scratchOffset);
278+
279+
if (isSigned)
280+
for (uint8_t channel = 0; channel < CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase::forcedScratchChannelAmount; ++channel)
281+
entryScratchAdress[channel] = entryScratchAdress[channel] / decodeAbsValuesOffset[channel];
282+
else
283+
for (uint8_t channel = 0; channel < CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase::forcedScratchChannelAmount; ++channel)
284+
entryScratchAdress[channel] = entryScratchAdress[channel] * 0.5f / decodeAbsValuesOffset[channel] + 0.5f;
285+
}
286+
};
237287

238-
// what about normalize, should it be done like SAT ?
288+
bool normalized = asset::isNormalizedFormat(outFormat);
289+
if (state->normalizeImageByTotalABSValues || normalized)
290+
normalizeScratch(asset::isSignedFormat(outFormat));
239291

240292
{
241293
uint8_t* outData = reinterpret_cast<uint8_t*>(state->outImage->getBuffer()->getPointer());
@@ -247,13 +299,14 @@ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon,
247299
uint8_t* outDataAdress = outData + writeBlockArrayOffset;
248300

249301
const size_t offset = asset::IImage::SBufferCopy::getLocalByteOffset(localOutPos, scratchByteStrides);
250-
auto* data = reinterpret_cast<uint8_t*>(scratchMemory) + offset;
251-
asset::encodePixels<asset::EF_R8G8_SNORM, double>(outDataAdress, reinterpret_cast<double*>(data)); // overrrides texels, so region-overlapping case is fine
302+
auto* data = reinterpret_cast<uint8_t*>(state->scratchMemory) + offset;
303+
304+
impl::CSwizzleAndConvertImageFilterBase<false, false, Swizzle, IdentityDither>::onEncode(outFormat, state, outDataAdress, data, localOutPos, 0, 0, CNormalMapToDerivativeFilterBase<Swizzle>::CNormalMapToDerivativeStateBase::forcedScratchChannelAmount); // overrrides texels, so region-overlapping case is fine
252305
};
253306

254307
IImage::SSubresourceLayers subresource = { static_cast<IImage::E_ASPECT_FLAGS>(0u), state->outMipLevel, state->outBaseLayer, 1 };
255308
CMatchedSizeInOutImageFilterCommon::state_type::TexelRange range = { state->outOffset,state->extent };
256-
CBasicImageFilterCommon::clip_region_functor_t clipFunctor(subresource, range, asset::EF_R8G8_SNORM);
309+
CBasicImageFilterCommon::clip_region_functor_t clipFunctor(subresource, range, outFormat);
257310

258311
auto& outRegions = state->outImage->getRegions(state->outMipLevel);
259312
CBasicImageFilterCommon::executePerRegion(state->outImage, encode, outRegions.begin(), outRegions.end(), clipFunctor);
@@ -266,7 +319,7 @@ class CNormalMapToDerivativeFilter : public CMatchedSizeInOutImageFilterCommon,
266319

267320
resetState();
268321
return true;
269-
}
322+
}
270323
};
271324

272325
} // end namespace asset

0 commit comments

Comments
 (0)