Skip to content

Commit b43ebab

Browse files
devshkeptsecret
authored andcommitted
Added support for allocating converted buffers in HOST_VISIBLE memory, so when ReBAR is available one can skip doing any data manipulation in convert
P.S. Watch and learn, cause thats how we'll leverage `VK_EXT_host_image_copy`
1 parent b20565d commit b43ebab

File tree

2 files changed

+41
-3
lines changed

2 files changed

+41
-3
lines changed

include/nbl/video/utilities/CAssetConverter.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,13 @@ class CAssetConverter : public core::IReferenceCounted
826826
return {};
827827
}
828828

829+
// If you absolutely need to avoid some memory type for your image or buffer, you can specify a mask here
830+
// one example would be to use HOST_VISIBLE to make sure your buffer can be written directly and doesn't need to go through staging
831+
virtual inline uint32_t constrainMemoryTypeBits(const size_t groupCopyID, const asset::IAsset* canonicalAsset, const core::blake3_hash_t& contentHash, const IDeviceMemoryBacked* memoryBacked) const
832+
{
833+
return ~0u;
834+
}
835+
829836
// most plain PNG, JPG, etc. loaders don't produce images with mip chains/tails
830837
virtual inline uint8_t getMipLevelCount(const size_t groupCopyID, const asset::ICPUImage* image, const patch_t<asset::ICPUImage>& patch) const
831838
{

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2018,8 +2018,12 @@ class MetaDeviceMemoryAllocator final
20182018
{
20192019
auto* gpuObj = pGpuObj->get();
20202020
const IDeviceMemoryBacked::SDeviceMemoryRequirements& memReqs = gpuObj->getMemoryReqs();
2021-
// this shouldn't be possible
2022-
assert(memReqs.memoryTypeBits&memoryTypeConstraint);
2021+
// overconstrained
2022+
if ((memReqs.memoryTypeBits&memoryTypeConstraint)==0)
2023+
{
2024+
m_logger.log("Overconstrained the Memory Type Index bitmask %d with %d for %s",system::ILogger::ELL_ERROR,memReqs.memoryTypeBits,memoryTypeConstraint,gpuObj->getObjectDebugName());
2025+
return false;
2026+
}
20232027
//
20242028
bool needsDeviceAddress = false;
20252029
if constexpr (std::is_same_v<std::remove_pointer_t<decltype(gpuObj)>,IGPUBuffer>)
@@ -3323,7 +3327,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
33233327
// record if a device memory allocation will be needed
33243328
if constexpr (std::is_base_of_v<IDeviceMemoryBacked,typename asset_traits<AssetType>::video_t>)
33253329
{
3326-
if (!deferredAllocator.request(&created.gpuObj))
3330+
const auto constrainMask = inputs.constrainMemoryTypeBits(uniqueCopyGroupID,instance.asset,contentHash,created.gpuObj.get());
3331+
if (!deferredAllocator.request(&created.gpuObj,constrainMask))
33273332
{
33283333
created.gpuObj.value = nullptr;
33293334
return;
@@ -3352,6 +3357,32 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
33523357
dedupCreateProp.operator()<ICPUImage>();
33533358
// now allocate the memory for buffers and images
33543359
deferredAllocator.finalize();
3360+
3361+
// can remove buffers from conversion requests which can be written to directly
3362+
{
3363+
core::vector<ILogicalDevice::MappedMemoryRange> flushRanges;
3364+
flushRanges.reserve(retval.m_bufferConversions.size());
3365+
std::erase_if(retval.m_bufferConversions,[&flushRanges](const SReserveResult::SConvReqBuffer& conv)->bool
3366+
{
3367+
const auto boundMemory = conv.gpuObj->getBoundMemory();
3368+
auto* const memory = boundMemory.memory;
3369+
if (!boundMemory.memory->isMappable())
3370+
return false;
3371+
const size_t size = conv.gpuObj->getSize();
3372+
const IDeviceMemoryAllocation::MemoryRange range = {boundMemory.offset,size};
3373+
// slightly inefficient but oh well
3374+
void* dst = memory->map(range,IDeviceMemoryAllocation::EMCAF_WRITE);
3375+
memcpy(dst,conv.canonical->getPointer(),size);
3376+
if (boundMemory.memory->haveToMakeVisible())
3377+
flushRanges.emplace_back(memory,range.offset,range.length,ILogicalDevice::MappedMemoryRange::align_non_coherent_tag);
3378+
return true;
3379+
}
3380+
);
3381+
if (!flushRanges.empty())
3382+
device->flushMappedMemoryRanges(flushRanges);
3383+
}
3384+
3385+
33553386
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
33563387
// Deal with Deferred Creation of Acceleration structures
33573388
{

0 commit comments

Comments
 (0)