Skip to content

Commit 29eb064

Browse files
committed
Fixed VT to work with textures smaller than page size (all mips being miptail)
But need to upscale them to at half page size, provided a function for this
1 parent 0b17afd commit 29eb064

File tree

6 files changed

+129
-23
lines changed

6 files changed

+129
-23
lines changed

examples_tests/22.RaytracedAO/Renderer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "nbl/ext/ScreenShot/ScreenShot.h"
66

77
#include "../source/Nabla/COpenCLHandler.h"
8+
#include "COpenGLDriver.h"
89

910

1011
#ifndef _NBL_BUILD_OPTIX_

include/nbl/asset/material_compiler/CMaterialCompilerGLSLBackendCommon.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ class CMaterialCompilerGLSLBackendCommon
632632

633633
bool commit(const commit_t& cm)
634634
{
635-
auto texture = asset::ICPUVirtualTexture::createPoTPaddedSquareImageWithMipLevels(cm.image.get(), cm.uwrap, cm.vwrap, cm.border).first;
635+
auto texture = vt->createPoTPaddedSquareImageWithMipLevels(cm.image.get(), cm.uwrap, cm.vwrap, cm.border).first;
636636
return vt->commit(cm.addr, texture.get(), cm.subresource, cm.uwrap, cm.vwrap, cm.border);
637637
}
638638
//! @returns if all commits succeeded

include/nbl/asset/utils/ICPUVirtualTexture.h

Lines changed: 106 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,79 @@ class ICPUVirtualTexture final : public IVirtualTexture<ICPUImageView, ICPUSampl
8686
}
8787
};
8888

89-
static auto createPoTPaddedSquareImageWithMipLevels(const ICPUImage* _img, ISampler::E_TEXTURE_CLAMP _wrapu, ISampler::E_TEXTURE_CLAMP _wrapv, ISampler::E_TEXTURE_BORDER_COLOR _borderColor)
89+
//! If there's a need, creates an image upscaled to half page size
90+
//! Otherwise returns `_img`
91+
//! Always call this before alloc()
92+
core::smart_refctd_ptr<asset::ICPUImage> createUpscaledImage(const ICPUImage* _img)
9093
{
94+
if (!_img)
95+
return nullptr;
96+
97+
const auto& params = _img->getCreationParameters();
98+
const uint32_t halfPage = m_pgSzxy / 2u;
99+
100+
if (params.extent.width >= halfPage || params.extent.height >= halfPage)
101+
{
102+
asset::ICPUImage* img = const_cast<asset::ICPUImage*>(_img);
103+
return core::smart_refctd_ptr<asset::ICPUImage>(img);
104+
}
105+
106+
const uint32_t min_extent = std::min(params.extent.width, params.extent.height);
107+
const float upscale_factor = static_cast<float>(halfPage) / static_cast<float>(min_extent);
108+
109+
asset::VkExtent3D extent_upscaled;
110+
extent_upscaled.depth = 1u;
111+
extent_upscaled.width = static_cast<uint32_t>(params.extent.width * upscale_factor + 0.5f);
112+
extent_upscaled.height = static_cast<uint32_t>(params.extent.height * upscale_factor + 0.5f);
113+
114+
asset::ICPUImage::SCreationParams new_params = params;
115+
new_params.extent = extent_upscaled;
116+
new_params.mipLevels = 1u;
117+
118+
auto upscaled_img = asset::ICPUImage::create(std::move(new_params));
119+
const size_t bufsz = upscaled_img->getImageDataSizeInBytes();
120+
auto buf = core::make_smart_refctd_ptr<asset::ICPUBuffer>(bufsz);
121+
auto regions = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<asset::IImage::SBufferCopy>>(1u);
122+
auto& region = regions->operator[](0u);
123+
region.bufferOffset = 0u;
124+
region.bufferRowLength = extent_upscaled.width;
125+
region.bufferImageHeight = 0u;
126+
region.imageOffset = { 0,0,0 };
127+
region.imageExtent = extent_upscaled;
128+
region.imageSubresource.baseArrayLayer = 0u;
129+
region.imageSubresource.layerCount = 1u;
130+
region.imageSubresource.mipLevel = 0u;
131+
region.imageSubresource.aspectMask = _img->getRegion(0u, core::vectorSIMDu32(0u, 0u, 0u, 0u))->imageSubresource.aspectMask;
132+
133+
upscaled_img->setBufferAndRegions(std::move(buf), std::move(regions));
134+
135+
using blit_filter_t = asset::CBlitImageFilter<false, false, asset::VoidSwizzle, asset::IdentityDither, asset::CMitchellImageFilterKernel<>>;
136+
blit_filter_t::state_type blit;
137+
blit.inOffsetBaseLayer = core::vectorSIMDu32(0u, 0u, 0u, 0u);
138+
blit.inExtent = params.extent;
139+
blit.inLayerCount = 1u;
140+
blit.outOffsetBaseLayer = core::vectorSIMDu32(0u, 0u, 0u, 0u);
141+
blit.outExtent = extent_upscaled;
142+
blit.outLayerCount = 1u;
143+
blit.inImage = const_cast<asset::ICPUImage*>(_img);
144+
blit.outImage = upscaled_img.get();
145+
blit.scratchMemoryByteSize = blit_filter_t::getRequiredScratchByteSize(&blit);
146+
blit.scratchMemory = reinterpret_cast<uint8_t*>(_NBL_ALIGNED_MALLOC(blit.scratchMemoryByteSize, _NBL_SIMD_ALIGNMENT));
147+
148+
const bool blit_succeeded = blit_filter_t::execute(&blit);
149+
_NBL_ALIGNED_FREE(blit.scratchMemory);
150+
if (!blit_succeeded)
151+
return nullptr;
152+
153+
return upscaled_img;
154+
}
155+
156+
//! Always call this before commit()
157+
static std::pair<core::smart_refctd_ptr<asset::ICPUImage>, asset::VkExtent3D> createPoTPaddedSquareImageWithMipLevels(const ICPUImage* _img, ISampler::E_TEXTURE_CLAMP _wrapu, ISampler::E_TEXTURE_CLAMP _wrapv, ISampler::E_TEXTURE_BORDER_COLOR _borderColor)
158+
{
159+
if (!_img)
160+
return { nullptr, asset::VkExtent3D{0u,0u,0u} };
161+
91162
const auto& params = _img->getCreationParameters();
92163
const auto originalExtent = params.extent;
93164
const uint32_t paddedExtent = core::roundUpToPoT(std::max<uint32_t>(params.extent.width,params.extent.height));
@@ -222,6 +293,19 @@ class ICPUVirtualTexture final : public IVirtualTexture<ICPUImageView, ICPUSampl
222293
uint32_t miptailPgAddr = SPhysPgOffset::invalid_addr;
223294

224295
using phys_pg_addr_alctr_t = ICPUVTResidentStorage::phys_pg_addr_alctr_t;
296+
297+
if (levelsTakingAtLeastOnePageCount < _subres.levelCount)
298+
{
299+
uint32_t miptailPgAddr_tmp = phys_pg_addr_alctr_t::invalid_address;
300+
const uint32_t szAndAlignment = 1u;
301+
core::address_allocator_traits<phys_pg_addr_alctr_t>::multi_alloc_addr(storage->tileAlctr, 1u, &miptailPgAddr_tmp, &szAndAlignment, &szAndAlignment, nullptr);
302+
miptailPgAddr_tmp = (miptailPgAddr_tmp == phys_pg_addr_alctr_t::invalid_address) ? SPhysPgOffset::invalid_addr : storage->encodePageAddress(miptailPgAddr_tmp);
303+
304+
miptailPgAddr = miptailPgAddr_tmp;
305+
}
306+
307+
const bool wholeTexGoesToMiptailPage = (levelsTakingAtLeastOnePageCount == 0u);
308+
225309
//TODO up to this line, it's kinda common code for CPU and GPU, refactor later
226310

227311
// TODO: parallelize over all 3 for loops
@@ -234,7 +318,7 @@ class ICPUVirtualTexture final : public IVirtualTexture<ICPUImageView, ICPUSampl
234318
for (uint32_t x = 0u; x < w; ++x)
235319
{
236320
uint32_t physPgAddr = phys_pg_addr_alctr_t::invalid_address;
237-
if (i>=levelsTakingAtLeastOnePageCount)
321+
if (i>=levelsTakingAtLeastOnePageCount) // this `if` always executes in case of whole texture going into miptail page
238322
physPgAddr = miptailPgAddr;
239323
else
240324
{
@@ -249,24 +333,22 @@ class ICPUVirtualTexture final : public IVirtualTexture<ICPUImageView, ICPUSampl
249333
if (i==(levelsTakingAtLeastOnePageCount-1u) && levelsTakingAtLeastOnePageCount<_subres.levelCount)
250334
{
251335
assert(w==1u && h==1u);
252-
uint32_t miptailPgAddr_tmp = phys_pg_addr_alctr_t::invalid_address;
253-
const uint32_t szAndAlignment = 1u;
254-
core::address_allocator_traits<phys_pg_addr_alctr_t>::multi_alloc_addr(storage->tileAlctr, 1u, &miptailPgAddr_tmp, &szAndAlignment, &szAndAlignment, nullptr);
255-
miptailPgAddr_tmp = (miptailPgAddr_tmp==phys_pg_addr_alctr_t::invalid_address) ? SPhysPgOffset::invalid_addr : storage->encodePageAddress(miptailPgAddr_tmp);
256-
257-
physPgAddr |= (miptailPgAddr_tmp<<SPhysPgOffset::PAGE_ADDR_BITLENGTH);
258-
259-
miptailPgAddr = miptailPgAddr_tmp;
336+
337+
physPgAddr |= (miptailPgAddr<<SPhysPgOffset::PAGE_ADDR_BITLENGTH);
260338
}
261-
else
339+
else // this `else` always executes in case of whole texture going into miptail page
262340
physPgAddr |= (SPhysPgOffset::invalid_addr<<SPhysPgOffset::PAGE_ADDR_BITLENGTH);
341+
263342
if (i < levelsTakingAtLeastOnePageCount)
264343
{
344+
// physical double-address to write into page table
345+
const uint32_t physAddrToWrite = physPgAddr;
346+
265347
const auto texelPos = core::vectorSIMDu32(pgtOffset.x>>i, pgtOffset.y>>i, 0u, pgtOffset.z) + core::vectorSIMDu32(x, y, 0u, 0u);
266348
const auto* region = m_pageTable->getRegion(i, texelPos);
267349
const uint64_t byteoffset = region->getByteOffset(texelPos, region->getByteStrides(m_pageTable->getTexelBlockInfo()));
268350
uint8_t* bufptr = reinterpret_cast<uint8_t*>(m_pageTable->getBuffer()->getPointer()) + byteoffset;
269-
reinterpret_cast<uint32_t*>(bufptr)[0] = physPgAddr;
351+
reinterpret_cast<uint32_t*>(bufptr)[0] = physAddrToWrite;
270352
}
271353

272354
if (!SPhysPgOffset(physPgAddr).valid())
@@ -319,6 +401,18 @@ class ICPUVirtualTexture final : public IVirtualTexture<ICPUImageView, ICPUSampl
319401
}
320402
}
321403

404+
if (wholeTexGoesToMiptailPage)
405+
{
406+
// physical double-address to write into page table
407+
uint32_t physAddrToWrite = SPhysPgOffset::invalid_addr | (miptailPgAddr << SPhysPgOffset::PAGE_ADDR_BITLENGTH);
408+
409+
const auto texelPos = core::vectorSIMDu32(pgtOffset.x, pgtOffset.y, 0u, pgtOffset.z);
410+
const auto* region = m_pageTable->getRegion(0u, texelPos);
411+
const uint64_t byteoffset = region->getByteOffset(texelPos, region->getByteStrides(m_pageTable->getTexelBlockInfo()));
412+
uint8_t* bufptr = reinterpret_cast<uint8_t*>(m_pageTable->getBuffer()->getPointer()) + byteoffset;
413+
reinterpret_cast<uint32_t*>(bufptr)[0] = physAddrToWrite;
414+
}
415+
322416
return true;
323417
}
324418

include/nbl/asset/utils/IVirtualTexture.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ class IVirtualTexture : public core::IReferenceCounted, public IVirtualTextureBa
139139
uint64_t pgTab_x : 8;
140140
uint64_t pgTab_y : 8;
141141
uint64_t pgTab_layer : 8;
142-
uint64_t maxMip : 4;
142+
uint64_t maxMip : 4; // value 0x0fu means, the texture takes only 1 page of physical storage (miptail)
143143
uint64_t wrap_x : 2;
144144
uint64_t wrap_y : 2;
145145

@@ -210,7 +210,9 @@ class IVirtualTexture : public core::IReferenceCounted, public IVirtualTextureBa
210210
texData.pgTab_y = _offset.y;
211211
texData.pgTab_layer = _offset.z;
212212

213-
texData.maxMip = _mipCount-1u-m_pgSzxy_log2;
213+
const uint32_t maxMip = _mipCount - 1u - m_pgSzxy_log2;
214+
assert(static_cast<int32_t>(maxMip) >= -1); // only textures of size at least half page size must be packed
215+
texData.maxMip = maxMip;
214216

215217
texData.wrap_x = SMasterTextureData::ETC_to_EWM(_wrapu);
216218
texData.wrap_y = SMasterTextureData::ETC_to_EWM(_wrapv);

include/nbl/builtin/glsl/virtual_texturing/impl_functions.glsl

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,10 @@ uvec2 nbl_glsl_unpackWrapModes(in uvec2 texData)
2626
{
2727
return (texData >> uvec2(28u, 30u))& uvec2(0x03u);
2828
}
29-
uint nbl_glsl_unpackMaxMipInVT(in uvec2 texData)
29+
int nbl_glsl_unpackMaxMipInVT(in uvec2 texData)
3030
{
31-
return (texData.y >> 24) & 0x0fu;
31+
uint mm = (texData.y >> 24) & 0x0fu;
32+
return mm == 0x0fu ? -1 : int(mm);
3233
}
3334
vec3 nbl_glsl_unpackVirtualUV(in uvec2 texData)
3435
{
@@ -118,13 +119,15 @@ vec4 nbl_glsl_vTextureGrad_impl(in uint formatID, in vec3 virtualUV, in mat2 dOr
118119
// are we performing minification
119120
bool positiveLoD = LoD>0.0;
120121
// magnification samples LoD 0, else clip to max representable in VT
121-
int clippedLoD = positiveLoD ? min(LoD_high,originalMaxFullMip):0;
122+
int clippedLoD = positiveLoD ? min(LoD_high,originalMaxFullMip):0; // originalMaxFullMip is always -1 in case of no miplevel taking at least 1 full page
123+
clippedLoD = originalMaxFullMip < 0 ? 0 : clippedLoD;
122124

123125
// if minification is being performaed then get tail position
124126
int levelInTail = LoD_high-clippedLoD;
125127
// have to do trilinear only if doing minification AND larger than 1x1 footprint
126128
bool haveToDoTrilinear = levelInTail<int(_NBL_VT_IMPL_PAGE_SZ_LOG2) && positiveLoD;
127129
levelInTail = haveToDoTrilinear ? levelInTail:(positiveLoD ? int(_NBL_VT_IMPL_PAGE_SZ_LOG2):0);
130+
levelInTail += (originalMaxFullMip < 0) ? 1 : 0;
128131

129132
// get the higher resolution mip-map level
130133
vec3 hiPhysCoord = nbl_glsl_vTexture_helper(formatID,virtualUV,clippedLoD,levelInTail);
@@ -133,9 +136,10 @@ vec4 nbl_glsl_vTextureGrad_impl(in uint formatID, in vec3 virtualUV, in mat2 dOr
133136
// speculative if (haveToDoTrilinear)
134137
{
135138
// now we have absolute guarantees that both LoD_high and LoD_low are in the valid original mip range
136-
bool highNotInLastFull = LoD_high<originalMaxFullMip;
139+
bool highNotInLastFull = originalMaxFullMip>=0 && LoD_high<originalMaxFullMip;
137140
clippedLoD = highNotInLastFull ? (clippedLoD+1):clippedLoD;
138141
levelInTail = highNotInLastFull ? levelInTail:(levelInTail+1);
142+
levelInTail = min(levelInTail, int(_NBL_VT_IMPL_PAGE_SZ_LOG2));
139143
loPhysCoord = nbl_glsl_vTexture_helper(formatID,virtualUV,clippedLoD,levelInTail);
140144
}
141145

@@ -184,7 +188,7 @@ vec4 nbl_glsl_vTextureGrad(in uvec2 _texData, in vec2 uv, in mat2 dUV)
184188
virtualUV.xy += uv*originalSz;
185189
virtualUV.xy *= nbl_glsl_VT_getVTexSzRcp();
186190

187-
return nbl_glsl_vTextureGrad_impl(formatID, virtualUV, dUV, int(nbl_glsl_unpackMaxMipInVT(_texData)));
191+
return nbl_glsl_vTextureGrad_impl(formatID, virtualUV, dUV, nbl_glsl_unpackMaxMipInVT(_texData));
188192
}
189193
#endif //_NBL_VT_FLOAT_VIEWS_COUNT
190194

@@ -226,7 +230,7 @@ ivec4 nbl_glsl_iVTextureLod(in uvec2 _texData, in vec2 uv, in uint lod)
226230
virtualUV.xy += uv * originalSz;
227231
virtualUV.xy *= nbl_glsl_VT_getVTexSzRcp();
228232

229-
return nbl_glsl_iVTextureLod_impl(formatID, virtualUV, lod, int(nbl_glsl_unpackMaxMipInVT(_texData)));
233+
return nbl_glsl_iVTextureLod_impl(formatID, virtualUV, lod, nbl_glsl_unpackMaxMipInVT(_texData));
230234
}
231235
#endif //_NBL_VT_INT_VIEWS_COUNT
232236

@@ -268,7 +272,7 @@ uvec4 nbl_glsl_uVTextureLod(in uvec2 _texData, in vec2 uv, in uint lod)
268272
virtualUV.xy += uv * originalSz;
269273
virtualUV.xy *= nbl_glsl_VT_getVTexSzRcp();
270274

271-
return nbl_glsl_uVTextureLod_impl(formatID, virtualUV, lod, int(nbl_glsl_unpackMaxMipInVT(_texData)));
275+
return nbl_glsl_uVTextureLod_impl(formatID, virtualUV, lod, nbl_glsl_unpackMaxMipInVT(_texData));
272276
}
273277
#endif //_NBL_VT_UINT_VIEWS_COUNT
274278

@@ -314,7 +318,7 @@ retval_t funcName(in uvec2 _texData, in vec2 uv, in uint lod) \
314318
virtualUV.xy += uv * originalSz; \
315319
virtualUV.xy *= nbl_glsl_VT_getVTexSzRcp(); \
316320
\
317-
return nbl_glsl_vTextureLod_impl(formatID, virtualUV, lod, int(nbl_glsl_unpackMaxMipInVT(_texData))); \
321+
return nbl_glsl_vTextureLod_impl(formatID, virtualUV, lod, nbl_glsl_unpackMaxMipInVT(_texData)); \
318322
}
319323
320324
_NBL_DEFINE_VT_INTEGER_FUNCTIONS(nbl_glsl_iVTextureLod, nbl_glsl_iVTextureLod_impl, ivec4, iphysicalTileStorageFormatView)

src/nbl/asset/material_compiler/CMaterialCompilerGLSLBackendCommon.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ namespace material_compiler
279279
return found->second;
280280

281281
auto img = tex.image->getCreationParameters().image;
282+
img = m_ctx->vt.vt->createUpscaledImage(img.get());
282283
auto* sampler = tex.sampler.get();
283284

284285
const auto& extent = img->getCreationParameters().extent;
@@ -302,6 +303,10 @@ namespace material_compiler
302303
alloc.uwrap = uwrap;
303304
alloc.vwrap = vwrap;
304305
auto addr = m_ctx->vt.alloc(alloc, std::move(img), border);
306+
/*if (alloc.extent.width == 64u && alloc.extent.height == 64u)
307+
{
308+
printf("allocated 64x64: %u, %u, %u, maxmip=%u\n", (uint32_t)addr.pgTab_x, (uint32_t)addr.pgTab_y, (uint32_t)addr.pgTab_layer, (uint32_t) addr.maxMip);
309+
}*/
305310

306311
std::pair<SContext::VTallocKey, instr_stream::VTID> item{{tex.image.get(),tex.sampler.get()}, addr};
307312
m_ctx->VTallocMap.insert(item);

0 commit comments

Comments
 (0)