@@ -86,8 +86,79 @@ class ICPUVirtualTexture final : public IVirtualTexture<ICPUImageView, ICPUSampl
86
86
}
87
87
};
88
88
89
- static auto createPoTPaddedSquareImageWithMipLevels (const ICPUImage* _img, ISampler::E_TEXTURE_CLAMP _wrapu, ISampler::E_TEXTURE_CLAMP _wrapv, ISampler::E_TEXTURE_BORDER_COLOR _borderColor)
89
+ // ! If there's a need, creates an image upscaled to half page size
90
+ // ! Otherwise returns `_img`
91
+ // ! Always call this before alloc()
92
+ core::smart_refctd_ptr<asset::ICPUImage> createUpscaledImage (const ICPUImage* _img)
90
93
{
94
+ if (!_img)
95
+ return nullptr ;
96
+
97
+ const auto & params = _img->getCreationParameters ();
98
+ const uint32_t halfPage = m_pgSzxy / 2u ;
99
+
100
+ if (params.extent .width >= halfPage || params.extent .height >= halfPage)
101
+ {
102
+ asset::ICPUImage* img = const_cast <asset::ICPUImage*>(_img);
103
+ return core::smart_refctd_ptr<asset::ICPUImage>(img);
104
+ }
105
+
106
+ const uint32_t min_extent = std::min (params.extent .width , params.extent .height );
107
+ const float upscale_factor = static_cast <float >(halfPage) / static_cast <float >(min_extent);
108
+
109
+ asset::VkExtent3D extent_upscaled;
110
+ extent_upscaled.depth = 1u ;
111
+ extent_upscaled.width = static_cast <uint32_t >(params.extent .width * upscale_factor + 0 .5f );
112
+ extent_upscaled.height = static_cast <uint32_t >(params.extent .height * upscale_factor + 0 .5f );
113
+
114
+ asset::ICPUImage::SCreationParams new_params = params;
115
+ new_params.extent = extent_upscaled;
116
+ new_params.mipLevels = 1u ;
117
+
118
+ auto upscaled_img = asset::ICPUImage::create (std::move (new_params));
119
+ const size_t bufsz = upscaled_img->getImageDataSizeInBytes ();
120
+ auto buf = core::make_smart_refctd_ptr<asset::ICPUBuffer>(bufsz);
121
+ auto regions = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<asset::IImage::SBufferCopy>>(1u );
122
+ auto & region = regions->operator [](0u );
123
+ region.bufferOffset = 0u ;
124
+ region.bufferRowLength = extent_upscaled.width ;
125
+ region.bufferImageHeight = 0u ;
126
+ region.imageOffset = { 0 ,0 ,0 };
127
+ region.imageExtent = extent_upscaled;
128
+ region.imageSubresource .baseArrayLayer = 0u ;
129
+ region.imageSubresource .layerCount = 1u ;
130
+ region.imageSubresource .mipLevel = 0u ;
131
+ region.imageSubresource .aspectMask = _img->getRegion (0u , core::vectorSIMDu32 (0u , 0u , 0u , 0u ))->imageSubresource .aspectMask ;
132
+
133
+ upscaled_img->setBufferAndRegions (std::move (buf), std::move (regions));
134
+
135
+ using blit_filter_t = asset::CBlitImageFilter<false , false , asset::VoidSwizzle, asset::IdentityDither, asset::CMitchellImageFilterKernel<>>;
136
+ blit_filter_t ::state_type blit;
137
+ blit.inOffsetBaseLayer = core::vectorSIMDu32 (0u , 0u , 0u , 0u );
138
+ blit.inExtent = params.extent ;
139
+ blit.inLayerCount = 1u ;
140
+ blit.outOffsetBaseLayer = core::vectorSIMDu32 (0u , 0u , 0u , 0u );
141
+ blit.outExtent = extent_upscaled;
142
+ blit.outLayerCount = 1u ;
143
+ blit.inImage = const_cast <asset::ICPUImage*>(_img);
144
+ blit.outImage = upscaled_img.get ();
145
+ blit.scratchMemoryByteSize = blit_filter_t::getRequiredScratchByteSize (&blit);
146
+ blit.scratchMemory = reinterpret_cast <uint8_t *>(_NBL_ALIGNED_MALLOC (blit.scratchMemoryByteSize , _NBL_SIMD_ALIGNMENT));
147
+
148
+ const bool blit_succeeded = blit_filter_t::execute (&blit);
149
+ _NBL_ALIGNED_FREE (blit.scratchMemory );
150
+ if (!blit_succeeded)
151
+ return nullptr ;
152
+
153
+ return upscaled_img;
154
+ }
155
+
156
+ // ! Always call this before commit()
157
+ static std::pair<core::smart_refctd_ptr<asset::ICPUImage>, asset::VkExtent3D> createPoTPaddedSquareImageWithMipLevels (const ICPUImage* _img, ISampler::E_TEXTURE_CLAMP _wrapu, ISampler::E_TEXTURE_CLAMP _wrapv, ISampler::E_TEXTURE_BORDER_COLOR _borderColor)
158
+ {
159
+ if (!_img)
160
+ return { nullptr , asset::VkExtent3D{0u ,0u ,0u } };
161
+
91
162
const auto & params = _img->getCreationParameters ();
92
163
const auto originalExtent = params.extent ;
93
164
const uint32_t paddedExtent = core::roundUpToPoT (std::max<uint32_t >(params.extent .width ,params.extent .height ));
@@ -222,6 +293,19 @@ class ICPUVirtualTexture final : public IVirtualTexture<ICPUImageView, ICPUSampl
222
293
uint32_t miptailPgAddr = SPhysPgOffset::invalid_addr;
223
294
224
295
using phys_pg_addr_alctr_t = ICPUVTResidentStorage::phys_pg_addr_alctr_t ;
296
+
297
+ if (levelsTakingAtLeastOnePageCount < _subres.levelCount )
298
+ {
299
+ uint32_t miptailPgAddr_tmp = phys_pg_addr_alctr_t ::invalid_address;
300
+ const uint32_t szAndAlignment = 1u ;
301
+ core::address_allocator_traits<phys_pg_addr_alctr_t >::multi_alloc_addr (storage->tileAlctr , 1u , &miptailPgAddr_tmp, &szAndAlignment, &szAndAlignment, nullptr );
302
+ miptailPgAddr_tmp = (miptailPgAddr_tmp == phys_pg_addr_alctr_t ::invalid_address) ? SPhysPgOffset::invalid_addr : storage->encodePageAddress (miptailPgAddr_tmp);
303
+
304
+ miptailPgAddr = miptailPgAddr_tmp;
305
+ }
306
+
307
+ const bool wholeTexGoesToMiptailPage = (levelsTakingAtLeastOnePageCount == 0u );
308
+
225
309
// TODO up to this line, it's kinda common code for CPU and GPU, refactor later
226
310
227
311
// TODO: parallelize over all 3 for loops
@@ -234,7 +318,7 @@ class ICPUVirtualTexture final : public IVirtualTexture<ICPUImageView, ICPUSampl
234
318
for (uint32_t x = 0u ; x < w; ++x)
235
319
{
236
320
uint32_t physPgAddr = phys_pg_addr_alctr_t ::invalid_address;
237
- if (i>=levelsTakingAtLeastOnePageCount)
321
+ if (i>=levelsTakingAtLeastOnePageCount) // this `if` always executes in case of whole texture going into miptail page
238
322
physPgAddr = miptailPgAddr;
239
323
else
240
324
{
@@ -249,24 +333,22 @@ class ICPUVirtualTexture final : public IVirtualTexture<ICPUImageView, ICPUSampl
249
333
if (i==(levelsTakingAtLeastOnePageCount-1u ) && levelsTakingAtLeastOnePageCount<_subres.levelCount )
250
334
{
251
335
assert (w==1u && h==1u );
252
- uint32_t miptailPgAddr_tmp = phys_pg_addr_alctr_t ::invalid_address;
253
- const uint32_t szAndAlignment = 1u ;
254
- core::address_allocator_traits<phys_pg_addr_alctr_t >::multi_alloc_addr (storage->tileAlctr , 1u , &miptailPgAddr_tmp, &szAndAlignment, &szAndAlignment, nullptr );
255
- miptailPgAddr_tmp = (miptailPgAddr_tmp==phys_pg_addr_alctr_t ::invalid_address) ? SPhysPgOffset::invalid_addr : storage->encodePageAddress (miptailPgAddr_tmp);
256
-
257
- physPgAddr |= (miptailPgAddr_tmp<<SPhysPgOffset::PAGE_ADDR_BITLENGTH);
258
-
259
- miptailPgAddr = miptailPgAddr_tmp;
336
+
337
+ physPgAddr |= (miptailPgAddr<<SPhysPgOffset::PAGE_ADDR_BITLENGTH);
260
338
}
261
- else
339
+ else // this `else` always executes in case of whole texture going into miptail page
262
340
physPgAddr |= (SPhysPgOffset::invalid_addr<<SPhysPgOffset::PAGE_ADDR_BITLENGTH);
341
+
263
342
if (i < levelsTakingAtLeastOnePageCount)
264
343
{
344
+ // physical double-address to write into page table
345
+ const uint32_t physAddrToWrite = physPgAddr;
346
+
265
347
const auto texelPos = core::vectorSIMDu32 (pgtOffset.x >>i, pgtOffset.y >>i, 0u , pgtOffset.z ) + core::vectorSIMDu32 (x, y, 0u , 0u );
266
348
const auto * region = m_pageTable->getRegion (i, texelPos);
267
349
const uint64_t byteoffset = region->getByteOffset (texelPos, region->getByteStrides (m_pageTable->getTexelBlockInfo ()));
268
350
uint8_t * bufptr = reinterpret_cast <uint8_t *>(m_pageTable->getBuffer ()->getPointer ()) + byteoffset;
269
- reinterpret_cast <uint32_t *>(bufptr)[0 ] = physPgAddr ;
351
+ reinterpret_cast <uint32_t *>(bufptr)[0 ] = physAddrToWrite ;
270
352
}
271
353
272
354
if (!SPhysPgOffset (physPgAddr).valid ())
@@ -319,6 +401,18 @@ class ICPUVirtualTexture final : public IVirtualTexture<ICPUImageView, ICPUSampl
319
401
}
320
402
}
321
403
404
+ if (wholeTexGoesToMiptailPage)
405
+ {
406
+ // physical double-address to write into page table
407
+ uint32_t physAddrToWrite = SPhysPgOffset::invalid_addr | (miptailPgAddr << SPhysPgOffset::PAGE_ADDR_BITLENGTH);
408
+
409
+ const auto texelPos = core::vectorSIMDu32 (pgtOffset.x , pgtOffset.y , 0u , pgtOffset.z );
410
+ const auto * region = m_pageTable->getRegion (0u , texelPos);
411
+ const uint64_t byteoffset = region->getByteOffset (texelPos, region->getByteStrides (m_pageTable->getTexelBlockInfo ()));
412
+ uint8_t * bufptr = reinterpret_cast <uint8_t *>(m_pageTable->getBuffer ()->getPointer ()) + byteoffset;
413
+ reinterpret_cast <uint32_t *>(bufptr)[0 ] = physAddrToWrite;
414
+ }
415
+
322
416
return true ;
323
417
}
324
418
0 commit comments