Skip to content

Commit facf68f

Browse files
committed
Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla-Examples-and-Tests into vulkan_1_3_example_03_fix
2 parents 44ce6b1 + 870e1d5 commit facf68f

File tree

20 files changed

+1004
-676
lines changed

20 files changed

+1004
-676
lines changed

02_HelloCompute/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ class HelloComputeApp final : public nbl::application_templates::MonoSystemMonoL
120120

121121
// Yes we know workgroup sizes can come from specialization constants, however DXC has a problem with that https://github.com/microsoft/DirectXShaderCompiler/issues/3092
122122
const string WorkgroupSizeAsStr = std::to_string(WorkgroupSize);
123-
const IShaderCompiler::SPreprocessorOptions::SMacroDefinition WorkgroupSizeDefine = {"WORKGROUP_SIZE",WorkgroupSizeAsStr};
123+
const IShaderCompiler::SMacroDefinition WorkgroupSizeDefine = {"WORKGROUP_SIZE",WorkgroupSizeAsStr};
124124

125125
CHLSLCompiler::SOptions options = {};
126126
// really we should set it to `ESS_COMPUTE` since we know, but we'll test the `#pragma` handling fur teh lulz

07_StagingAndMultipleQueues/app_resources/comp_shader.hlsl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22

33
#include "../app_resources/common.hlsl"
44

5-
[[vk::combinedImageSampler]][[vk::binding(0,0)]] Texture2D texture;
6-
[[vk::combinedImageSampler]][[vk::binding(0,0)]] SamplerState samplerState;
5+
[[vk::binding(0,0)]] Texture2D texture;
76
[[vk::binding(1,0)]] RWStructuredBuffer<uint32_t> histogram;
87

98
[[vk::push_constant]]
@@ -18,7 +17,7 @@ void main(uint32_t3 ID : SV_DispatchThreadID)
1817
if(ID.x >= width || ID.y >= height)
1918
return;
2019

21-
const float32_t4 texel = texture.SampleLevel(samplerState, ID.xy, 0.0);
20+
const float32_t4 texel = texture.Load(int32_t3(ID.xy,/*miplevel*/0));
2221

2322
const uint32_t redVal = uint32_t(texel.r * 255.f + 0.5f);
2423
const uint32_t greenVal = uint32_t(texel.g * 255.f + 0.5f);

07_StagingAndMultipleQueues/main.cpp

Lines changed: 85 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
2121
using device_base_t = application_templates::BasicMultiQueueApplication;
2222
using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication;
2323

24+
// TODO: would be cool if we used `system::ISystem::listItemsInDirectory(sharedInputCWD/"GLI")` as our dataset
2425
static constexpr std::array imagesToLoad = {
2526
"../app_resources/test0.png",
2627
"../app_resources/test1.png",
@@ -54,15 +55,17 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
5455
m_histogramSavedSemaphore = m_device->createSemaphore(TIMELINE_SEMAPHORE_STARTING_VALUE);
5556

5657
// TODO: create/initialize array of atomic pointers to IGPUImage* and IGPUBuffer* to hold results
57-
// no need i think
5858

59+
// TODO: Change the capture start/end to become methods of IAPIConnection, because our current API is not how renderdoc works
60+
getComputeQueue()->startCapture();
5961
std::thread loadImagesThread(&StagingAndMultipleQueuesApp::loadImages, this);
6062
std::thread saveHistogramsThread(&StagingAndMultipleQueuesApp::saveHistograms, this);
6163

6264
calculateHistograms();
6365

6466
loadImagesThread.join();
6567
saveHistogramsThread.join();
68+
getComputeQueue()->endCapture();
6669

6770
return true;
6871
}
@@ -89,8 +92,8 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
8992

9093
private:
9194
smart_refctd_ptr<ISemaphore> m_imagesLoadedSemaphore, m_imagesProcessedSemaphore, m_histogramSavedSemaphore;
92-
std::atomic<uint32_t> m_imagesLoadedCnt, m_imagesProcessedCnt, m_imagesDownloadedCnt, m_imagesSavedCnt;
9395
std::atomic<uint32_t> imageHandlesCreated = 0u;
96+
std::atomic<uint32_t> transfersSubmitted = 0u;
9497
std::array<core::smart_refctd_ptr<IGPUImage>, IMAGE_CNT> images;
9598

9699
static constexpr uint32_t FRAMES_IN_FLIGHT = 3u;
@@ -104,34 +107,37 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
104107

105108
void loadImages()
106109
{
110+
const core::set<uint32_t> uniqueFamilyIndices = { getTransferUpQueue()->getFamilyIndex(), getComputeQueue()->getFamilyIndex() };
111+
const std::vector<uint32_t> familyIndices(uniqueFamilyIndices.begin(),uniqueFamilyIndices.end());
112+
const bool multipleQueueFamilies = familyIndices.size()>1;
113+
107114
IAssetLoader::SAssetLoadParams lp;
108115
lp.logger = m_logger.get();
109116

110117
auto transferUpQueue = getTransferUpQueue();
111-
const core::bitflag<IGPUCommandPool::CREATE_FLAGS> commandPoolFlags = IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT;
112118
std::array<core::smart_refctd_ptr<nbl::video::IGPUCommandPool>, FRAMES_IN_FLIGHT> commandPools;
113119
std::array<core::smart_refctd_ptr<nbl::video::IGPUCommandBuffer>, FRAMES_IN_FLIGHT> commandBuffers;
114120
std::fill(commandPools.begin(), commandPools.end(), nullptr);
115121

116122
core::smart_refctd_ptr<ICPUImage> cpuImages[IMAGE_CNT];
117123
for (uint32_t i = 0u; i < FRAMES_IN_FLIGHT; ++i)
118124
{
125+
const core::bitflag<IGPUCommandPool::CREATE_FLAGS> commandPoolFlags = IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT;
119126
commandPools[i] = m_device->createCommandPool(transferUpQueue->getFamilyIndex(), commandPoolFlags);
120127
commandPools[i]->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, {commandBuffers.data() + i, 1}, core::smart_refctd_ptr(m_logger));
128+
commandBuffers[i]->setObjectDebugName(("Upload Command Buffer #"+std::to_string(i)).c_str());
121129
}
122130

123131
core::smart_refctd_ptr<ISemaphore> imgFillSemaphore = m_device->createSemaphore(0);
124-
IQueue::SSubmitInfo::SSemaphoreInfo imgFillSemaphoreInfo[] =
125-
{
126-
{
132+
imgFillSemaphore->setObjectDebugName("Image Fill Semaphore");
133+
SIntendedSubmitInfo intendedSubmit = {
134+
.queue = transferUpQueue,
135+
.waitSemaphores = {},
136+
.commandBuffers = {}, // fill later
137+
.scratchSemaphore = {
127138
.semaphore = imgFillSemaphore.get(),
128-
.value = 1,
139+
.value = 0,
129140
.stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS
130-
},
131-
{
132-
.semaphore = m_imagesLoadedSemaphore.get(),
133-
.value = 0xdeadbeef,
134-
.stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS
135141
}
136142
};
137143

@@ -162,13 +168,15 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
162168
imgParams.arrayLayers = 1u;
163169
imgParams.samples = IImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT;
164170
imgParams.usage = asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT;
165-
// constexpr uint32_t FAMILY_INDICES_CNT = 3; // TODO: test on intel integrated GPU (which allows only one queue family)
166-
std::array familyIndices = { getTransferUpQueue()->getFamilyIndex(), getComputeQueue()->getFamilyIndex() };
167-
imgParams.queueFamilyIndexCount = familyIndices.size();
168-
imgParams.queueFamilyIndices = familyIndices.data();
171+
if (multipleQueueFamilies)
172+
{
173+
imgParams.queueFamilyIndexCount = familyIndices.size();
174+
imgParams.queueFamilyIndices = familyIndices.data();
175+
}
169176
imgParams.preinitialized = false;
170177

171178
images[imageIdx] = m_device->createImage(std::move(imgParams));
179+
images[imageIdx]->setObjectDebugName(("Image #"+std::to_string(imageIdx)).c_str());
172180
auto imageAllocation = m_device->allocate(images[imageIdx]->getMemoryReqs(), images[imageIdx].get(), IDeviceMemoryAllocation::EMAF_NONE);
173181
imageHandlesCreated++;
174182
imageHandlesCreated.notify_one();
@@ -206,13 +214,7 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
206214
}
207215

208216
IQueue::SSubmitInfo::SCommandBufferInfo imgFillCmdBuffInfo = { cmdBuff.get() };
209-
210-
imgFillSemaphoreInfo[1].value = imageIdx + 1u;
211-
212-
213-
SIntendedSubmitInfo intendedSubmit = {
214-
.frontHalf = {.queue = transferUpQueue, .waitSemaphores = {}, .commandBuffers = {&imgFillCmdBuffInfo, 1}}, .signalSemaphores = imgFillSemaphoreInfo
215-
};
217+
intendedSubmit.commandBuffers = {&imgFillCmdBuffInfo,1};
216218

217219
cmdBuff->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
218220

@@ -221,18 +223,17 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
221223
if (!cmdBuff->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, pplnBarrierDepInfo0))
222224
logFailAndTerminate("Failed to issue barrier!\n");
223225

224-
225-
transferUpQueue->startCapture();
226-
uint64_t oldCntr = imgFillSemaphoreInfo[0].value;
226+
const uint64_t oldCntr = intendedSubmit.scratchSemaphore.value;
227227
const bool uploadCommendRecorded = m_utils->updateImageViaStagingBuffer(
228228
intendedSubmit, cpuImages[imageIdx]->getBuffer(), cpuImages[imageIdx]->getCreationParameters().format,
229229
images[imageIdx].get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, cpuImages[imageIdx]->getRegions()
230230
);
231231
if (!uploadCommendRecorded)
232232
logFailAndTerminate("Couldn't update image data.\n");
233233

234-
if(imgFillSemaphoreInfo[0].value != oldCntr)
235-
m_logger->log("%d overflows when uploading image %d!\n", ILogger::ELL_PERFORMANCE, imgFillSemaphoreInfo[0].value - oldCntr, imageIdx);
234+
const auto newCntr = intendedSubmit.scratchSemaphore.value;
235+
if (newCntr!=oldCntr)
236+
m_logger->log("%d overflows when uploading image %d!\n", ILogger::ELL_PERFORMANCE, newCntr-oldCntr, imageIdx);
236237

237238
IGPUCommandBuffer::SPipelineBarrierDependencyInfo pplnBarrierDepInfo1;
238239
pplnBarrierDepInfo1.imgBarriers = { &imageLayoutTransitionBarrier1, 1 };
@@ -242,10 +243,15 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
242243

243244
cmdBuff->end();
244245

245-
intendedSubmit.advanceScratchSemaphoreValue();
246-
IQueue::SSubmitInfo submitInfo[1] = { intendedSubmit };
247-
getTransferUpQueue()->submit(submitInfo);
248-
transferUpQueue->endCapture();
246+
const IQueue::SSubmitInfo::SSemaphoreInfo signalSemaphore = {
247+
.semaphore=m_imagesLoadedSemaphore.get(),
248+
.value=imageIdx+1u,
249+
// cannot signal from COPY stage because there's a layout transition we need to wait for right after and it doesn't have an explicit stage
250+
.stageMask=PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS
251+
};
252+
getTransferUpQueue()->submit(intendedSubmit.popSubmit({&signalSemaphore,1}));
253+
transfersSubmitted++;
254+
transfersSubmitted.notify_one();
249255

250256

251257
// TODO: this is for basic testing purposes, will be deleted ofc
@@ -259,44 +265,48 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
259265
{
260266
// INITIALIZE COMMON DATA
261267
auto computeQueue = getComputeQueue();
262-
const core::bitflag<IGPUCommandPool::CREATE_FLAGS> commandPoolFlags = static_cast<IGPUCommandPool::CREATE_FLAGS>(IGPUCommandPool::CREATE_FLAGS::NONE);
263-
std::array<core::smart_refctd_ptr<nbl::video::IGPUCommandPool>, FRAMES_IN_FLIGHT> commandPools;
264-
std::array<core::smart_refctd_ptr<nbl::video::IGPUCommandBuffer>, FRAMES_IN_FLIGHT> commandBuffers;
268+
269+
smart_refctd_ptr<IGPUDescriptorSetLayout> dsLayout;
265270
core::smart_refctd_ptr<IGPUDescriptorSet> descSets[FRAMES_IN_FLIGHT];
266-
std::fill(commandPools.begin(), commandPools.end(), nullptr);
267-
nbl::video::IGPUDescriptorSetLayout::SBinding bindings[2] = {
268-
{
269-
.binding = 0,
270-
.type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER,
271-
.createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE,
272-
.stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE,
273-
.count = 1,
274-
.samplers = nullptr
275-
},
271+
{
272+
nbl::video::IGPUDescriptorSetLayout::SBinding bindings[2] = {
273+
{
274+
.binding = 0,
275+
.type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, // TODO: just an image descriptor type when separable samplers arrive
276+
.createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE,
277+
.stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE,
278+
.count = 1,
279+
.samplers = nullptr
280+
},
281+
{
282+
.binding = 1,
283+
.type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,
284+
.createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE,
285+
.stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE,
286+
.count = 1,
287+
.samplers = nullptr
288+
}
289+
};
290+
291+
dsLayout = m_device->createDescriptorSetLayout(bindings);
292+
if (!dsLayout)
293+
logFailAndTerminate("Failed to create a Descriptor Layout!\n");
294+
auto descPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }, &FRAMES_IN_FLIGHT);
295+
for (uint32_t i = 0u; i < FRAMES_IN_FLIGHT; ++i)
276296
{
277-
.binding = 1,
278-
.type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,
279-
.createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE,
280-
.stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE,
281-
.count = 1,
282-
.samplers = nullptr
297+
descSets[i] = descPool->createDescriptorSet(core::smart_refctd_ptr(dsLayout));
298+
descSets[i]->setObjectDebugName(("Descriptor Set #" + std::to_string(i)).c_str());
283299
}
284-
};
285-
smart_refctd_ptr<IGPUDescriptorSetLayout> dsLayout[1] = { m_device->createDescriptorSetLayout(bindings) };
286-
if (!dsLayout[0])
287-
logFailAndTerminate("Failed to create a Descriptor Layout!\n");
288-
smart_refctd_ptr<nbl::video::IDescriptorPool> descPools[FRAMES_IN_FLIGHT] = { // TODO: only one desc pool?
289-
m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, {&dsLayout[0].get(), 1}),
290-
m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, {&dsLayout[0].get(), 1}),
291-
m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, {&dsLayout[0].get(), 1})
292-
};
300+
}
293301

302+
std::array<core::smart_refctd_ptr<nbl::video::IGPUCommandPool>, FRAMES_IN_FLIGHT> commandPools;
303+
std::array<core::smart_refctd_ptr<nbl::video::IGPUCommandBuffer>, FRAMES_IN_FLIGHT> commandBuffers;
294304
for (uint32_t i = 0u; i < FRAMES_IN_FLIGHT; ++i)
295305
{
306+
const core::bitflag<IGPUCommandPool::CREATE_FLAGS> commandPoolFlags = IGPUCommandPool::CREATE_FLAGS::NONE;
296307
commandPools[i] = m_device->createCommandPool(getComputeQueue()->getFamilyIndex(), commandPoolFlags);
297308
commandPools[i]->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, {commandBuffers.data() + i, 1}, core::smart_refctd_ptr(m_logger));
298-
299-
descSets[i] = descPools[i]->createDescriptorSet(core::smart_refctd_ptr(dsLayout[0]));
309+
commandBuffers[i]->setObjectDebugName(("Histogram Command Buffer #" + std::to_string(i)).c_str());
300310
}
301311

302312
// LOAD SHADER FROM FILE
@@ -320,7 +330,7 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
320330
pc[0].size = sizeof(PushConstants);
321331

322332
smart_refctd_ptr<nbl::video::IGPUComputePipeline> pipeline;
323-
smart_refctd_ptr<IGPUPipelineLayout> pplnLayout = m_device->createPipelineLayout(pc, smart_refctd_ptr(dsLayout[0]));
333+
smart_refctd_ptr<IGPUPipelineLayout> pplnLayout = m_device->createPipelineLayout(pc,std::move(dsLayout));
324334
{
325335
// Nabla actually has facilities for SPIR-V Reflection and "guessing" pipeline layouts for a given SPIR-V which we'll cover in a different example
326336
if (!pplnLayout)
@@ -369,6 +379,7 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
369379
m_histogramBufferMemPtrs[2] = m_histogramBufferMemPtrs[1] + HISTOGRAM_SIZE;
370380
}
371381

382+
// TODO: will no longer be necessary after separable samplers and images
372383
IGPUSampler::SParams samplerParams;
373384
samplerParams.AnisotropicFilter = false;
374385
core::smart_refctd_ptr<IGPUSampler> sampler = m_device->createSampler(samplerParams);
@@ -409,17 +420,18 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
409420
params.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT;
410421
params.subresourceRange.layerCount = images[imageToProcessId]->getCreationParameters().arrayLayers;
411422

412-
imgInfo.desc = m_device->createImageView(std::move(params));
413-
if (!imgInfo.desc)
423+
auto view = m_device->createImageView(std::move(params));
424+
if (!view)
414425
logFailAndTerminate("Couldn't create descriptor.");
426+
view->setObjectDebugName(("Image View #"+std::to_string(imageToProcessId)).c_str());
427+
imgInfo.desc = std::move(view);
415428
imgInfo.info.image = { .sampler = sampler, .imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL };
416429

417430
IGPUDescriptorSet::SWriteDescriptorSet write[1] = {
418431
{.dstSet = descSets[resourceIdx].get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &imgInfo }
419432
};
420433
m_device->updateDescriptorSets(1, write, 0u, nullptr);
421434

422-
computeQueue->startCapture();
423435
cmdBuff->begin(IGPUCommandBuffer::USAGE::NONE);
424436
cmdBuff->beginDebugMarker("My Compute Dispatch", core::vectorSIMDf(0, 1, 0, 1));
425437
cmdBuff->bindComputePipeline(pipeline.get());
@@ -450,8 +462,14 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
450462
submitInfo[0].commandBuffers = cmdBuffSubmitInfo;
451463
submitInfo[0].signalSemaphores = signalSemaphoreSubmitInfo;
452464
submitInfo[0].waitSemaphores = {waitSemaphoreSubmitInfo, imageToProcessId < FRAMES_IN_FLIGHT ? 1u : 2u};
465+
// Some Devices like all of the Intel GPUs do not have enough queues for us to allocate different queues to compute and transfers,
466+
// so our `BasicMultiQueueApplication` will "alias" a single queue to both usages. Normally you don't need to care, but here we're
467+
// attempting to do "out-of-order" "submit-before-signal" so we need to "hold back" submissions if the queues are aliased!
468+
// TODO: Renderdoc freezes because it starts capturing immediately upon a submit and can't defer a capture till semaphores signal.
469+
if (getTransferUpQueue()==computeQueue /*|| m_api->isRunningInRenderdoc()*/)
470+
for (auto old = transfersSubmitted.load(); old <= imageToProcessId; old = transfersSubmitted.load())
471+
transfersSubmitted.wait(old);
453472
computeQueue->submit(submitInfo);
454-
computeQueue->endCapture();
455473
std::string msg = std::string("Image nr ") + std::to_string(imageToProcessId) + " processed. Resource idx: " + std::to_string(resourceIdx);
456474
m_logger->log(msg);
457475
}

21_LRUCacheUnitTest/main.cpp

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,21 @@ class LRUCacheTestApp final : public nbl::application_templates::MonoSystemMonoL
4141
cache.insert(11, 'd');
4242
cache.insert(12, 'e');
4343
cache.insert(13, 'f');
44-
44+
cache.print(m_logger);
45+
m_logger->log("We're Referencing `10:c`");
46+
char returned = *(cache.get(10));
47+
cache.print(m_logger);
48+
m_logger->log("We're erasing `11:d`");
49+
cache.erase(11);
50+
assert(cache.get(11) == nullptr);
51+
cache.print(m_logger);
52+
m_logger->log("We're adding `11:d` again");
53+
cache.insert(11, 'd');
4554
cache.print(m_logger);
4655

47-
char returned = *(cache.get(11));
48-
assert(returned == 'd');
49-
returned = *(cache.get(10));
5056
assert(returned == 'c');
57+
returned = *(cache.get(11));
58+
assert(returned == 'd');
5159
returned = *(cache.get(13));
5260
assert(returned == 'f');
5361

@@ -105,6 +113,44 @@ class LRUCacheTestApp final : public nbl::application_templates::MonoSystemMonoL
105113
#endif
106114
m_logger->log("all good");
107115

116+
constexpr uint32_t InvalidIdx = ~0u;
117+
struct TextureReference
118+
{
119+
uint32_t alloc_idx;
120+
uint64_t lastUsedSemaphoreValue;
121+
122+
// copy ctor
123+
TextureReference(const TextureReference& tref)
124+
{
125+
assert(false); // based on the code in this test, copy constuctor shouldn't be called
126+
}
127+
TextureReference(TextureReference&& tref) = default;
128+
inline TextureReference& operator=(TextureReference&& tref) = default;
129+
130+
TextureReference(uint32_t alloc_idx, uint64_t semaphoreVal) : alloc_idx(alloc_idx), lastUsedSemaphoreValue(semaphoreVal) {}
131+
TextureReference(uint64_t semaphoreVal) : TextureReference(InvalidIdx, semaphoreVal) {}
132+
TextureReference() : TextureReference(InvalidIdx, ~0ull) {}
133+
134+
// In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value to TextureReference without changing `alloc_idx`
135+
inline TextureReference& operator=(uint64_t semamphoreVal) { lastUsedSemaphoreValue = semamphoreVal; return *this; }
136+
};
137+
using TextureLRUCache = LRUCache<uint32_t, TextureReference>;
138+
139+
TextureLRUCache textureCache = TextureLRUCache(3u);
140+
141+
static_assert(std::is_assignable_v<TextureReference, uint64_t>);
142+
static_assert(std::is_constructible_v<TextureReference, uint64_t>);
143+
144+
textureCache.insert(91u, TextureReference{ ~0u, 69u });
145+
textureCache.insert(92u, TextureReference{ 20u, 70u });
146+
textureCache.insert(93u, TextureReference{ 10u, 71u });
147+
auto t = textureCache.get(91u);
148+
assert(t->lastUsedSemaphoreValue == 69u); // make 91 jump to front, now 92 is the LRU
149+
// next insertion will evict because capacity is 3
150+
auto insertion = textureCache.insert(99u, 6999ull, [](const TextureReference& evictedTextureRef) -> void { assert(evictedTextureRef.alloc_idx == 20u); });
151+
assert(insertion->alloc_idx == InvalidIdx);
152+
assert(insertion->lastUsedSemaphoreValue == 6999ull);
153+
108154
return true;
109155
}
110156

0 commit comments

Comments
 (0)