staging cache merge

devsh · devsh · commit 08b8a38c7e68 · 2024-08-23T22:56:12.000+02:00
diff --git a/examples_tests b/examples_tests
@@ -1 +1 @@
-Subproject commit 1d7913b20d33faace8a0cb6383bdacd960188e84
+Subproject commit 13e24d6cab757c8d198cccf8bab0cc089a6013b3
diff --git a/include/nbl/video/utilities/IGPUObjectFromAssetConverter.h b/include/nbl/video/utilities/IGPUObjectFromAssetConverter.h
@@ -7,145 +7,10 @@
 #include "nbl/core/declarations.h"
 #include "nbl/core/alloc/LinearAddressAllocator.h"
 
-#include <iterator>
-
-
-//#include "nbl/asset/asset.h"
-
-
-#include "nbl/video/asset_traits.h"
 #include "nbl/video/ISemaphore.h"
 #include "nbl/video/ILogicalDevice.h"
 
 #if 0
-auto IGPUObjectFromAssetConverter::create(const asset::ICPUBuffer** const _begin, const asset::ICPUBuffer** const _end, SParams& _params) -> created_gpu_object_array<asset::ICPUBuffer> // TODO: improve for caches of very large buffers!!!
-{
-    const auto assetCount = std::distance(_begin, _end);
-    auto res = core::make_refctd_dynamic_array<created_gpu_object_array<asset::ICPUBuffer> >(assetCount);
-
-    const auto& limits = _params.device->getPhysicalDevice()->getLimits();
-
-    const uint64_t alignment =
-    std::max<uint64_t>(
-        std::max<uint64_t>(limits.bufferViewAlignment,limits.minSSBOAlignment),
-        std::max<uint64_t>(limits.minUBOAlignment, _NBL_SIMD_ALIGNMENT)
-    );
-
-    const uint64_t maxBufferSize = limits.maxBufferSize;
-    auto out = res->begin();
-    auto firstInBlock = out;
-    auto newBlock = [&]() -> auto
-    {
-        return core::LinearAddressAllocator<uint64_t>(nullptr, 0u, 0u, alignment, maxBufferSize);
-    };
-    auto addrAllctr = newBlock();
-
-    auto & fence = _params.fences[EQU_TRANSFER];
-    fence = _params.device->createFence(static_cast<IGPUFence::E_CREATE_FLAGS>(0));
-    core::smart_refctd_ptr<IGPUCommandBuffer> cmdbuf = _params.perQueue[EQU_TRANSFER].cmdbuf;
-
-    IQueue::SSubmitInfo submit;
-    {
-        submit.commandBufferCount = 1u;
-        submit.commandBuffers = &cmdbuf.get();
-        // CPU to GPU upload doesn't need to wait for anything or narrow down the execution barrier
-        // buffer and addresses we're writing into are fresh and brand new, don't need to synchronize the writing with anything
-        submit.waitSemaphoreCount = 0u;
-        submit.pWaitDstStageMask = nullptr;
-        submit.pWaitSemaphores = nullptr;
-    }
-    
-    assert(cmdbuf && cmdbuf->getState() == IGPUCommandBuffer::STATE::RECORDING);
-    // cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
-
-    auto finalizeBlock = [&]() -> void
-    {
-        auto bufferSize = addrAllctr.get_allocated_size();
-        if (bufferSize==0u)
-            return;
-        
-        IGPUBuffer::SCreationParams bufparams = {};
-        bufparams.size = bufferSize;
-        bufparams.usage = core::bitflag(IGPUBuffer::EUF_TRANSFER_DST_BIT);
-
-        for (auto it = firstInBlock; it != out; it++)
-        {
-            auto cpubuffer = _begin[std::distance(res->begin(), it)];
-            bufparams.usage |= cpubuffer->getUsageFlags();
-        }
-
-        uint32_t qfams[2]{ _params.perQueue[EQU_TRANSFER].queue->getFamilyIndex(), _params.finalQueueFamIx };
-        bufparams.queueFamilyIndices = qfams;
-        bufparams.queueFamilyIndexCount = (qfams[0] == qfams[1]) ? 0u : 2u;
-        
-        core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags(IDeviceMemoryAllocation::EMAF_NONE);
-        if(bufparams.usage.hasFlags(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT))
-            allocateFlags |= IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT;
-
-        auto gpubuffer = _params.device->createBuffer(std::move(bufparams));
-        auto gpubufferMemReqs = gpubuffer->getMemoryReqs();
-        gpubufferMemReqs.memoryTypeBits &= _params.device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits();
-        auto gpubufferMem = _params.device->allocate(gpubufferMemReqs, gpubuffer.get(), allocateFlags);
-
-        for (auto it = firstInBlock; it != out; it++)
-        {
-            if (auto output = *it)
-            {
-                auto cpubuffer = _begin[std::distance(res->begin(), it)];
-                asset::SBufferRange<IGPUBuffer> bufrng;
-                bufrng.offset = output->getOffset();
-                bufrng.size = cpubuffer->getSize();
-                bufrng.buffer = gpubuffer;
-                output->setBuffer(core::smart_refctd_ptr(gpubuffer));
-                submit = _params.utilities->updateBufferRangeViaStagingBuffer(
-                    bufrng,cpubuffer->getPointer(),
-                    _params.perQueue[EQU_TRANSFER].queue, fence.get(), submit
-                );
-            }
-        }
-    };
-    for (auto it=_begin; it!=_end; it++,out++)
-    {
-        auto cpubuffer = *it;
-        if (cpubuffer->getSize()>maxBufferSize)
-            continue;
-
-        uint64_t addr = addrAllctr.alloc_addr(cpubuffer->getSize(),alignment);
-        if (addr==decltype(addrAllctr)::invalid_address)
-        {
-            finalizeBlock();
-            firstInBlock = out;
-            addrAllctr = newBlock();
-            addr = addrAllctr.alloc_addr(cpubuffer->getSize(),alignment);
-        }
-        assert(addr != decltype(addrAllctr)::invalid_address);
-        *out = core::make_smart_refctd_ptr<typename asset_traits<asset::ICPUBuffer>::GPUObjectType>(addr);
-    }
-    finalizeBlock();
-
-    // TODO: submit outside of `create` and make the function take an already created semaphore to signal
-    cmdbuf->end();
-    core::smart_refctd_ptr<IGPUSemaphore> sem;
-    if (_params.perQueue[EQU_TRANSFER].semaphore)
-    {
-        sem = _params.device->createSemaphore();
-        submit.signalSemaphoreCount = 1u;
-        submit.pSignalSemaphores = &sem.get();
-    }
-    else
-    {
-        submit.signalSemaphoreCount = 0u;
-        submit.pSignalSemaphores = nullptr;
-    }
-    // transfer_fence needs to be signalled because of the streaming buffer uploads need to be fenced
-    _params.perQueue[EQU_TRANSFER].queue->submit(1u,&submit,fence.get());
-    if (_params.perQueue[EQU_TRANSFER].semaphore)
-        _params.perQueue[EQU_TRANSFER].semaphore[0] = std::move(sem);
-
-    return res;
-}
-
-
 // TODO: rewrite after GPU polyphase implementation
 auto IGPUObjectFromAssetConverter::create(const asset::ICPUImage** const _begin, const asset::ICPUImage** const _end, SParams& _params) -> created_gpu_object_array<asset::ICPUImage>
 {