Added support for allocating converted buffers in HOST_VISIBLE memory, so when ReBAR is available one can skip doing any data manipulation in convert

devsh · devsh · commit 606ea550c5f0 · 2025-05-01T15:21:41.000+02:00
P.S. Watch and learn, cause thats how we'll leverage `VK_EXT_host_image_copy`
diff --git a/include/nbl/video/utilities/CAssetConverter.h b/include/nbl/video/utilities/CAssetConverter.h
@@ -826,6 +826,13 @@ class CAssetConverter : public core::IReferenceCounted
 				return {};
 			}
 
+			// If you absolutely need to avoid some memory type for your image or buffer, you can specify a mask here
+			// one example would be to use HOST_VISIBLE to make sure your buffer can be written directly and doesn't need to go through staging
+			virtual inline uint32_t constrainMemoryTypeBits(const size_t groupCopyID, const asset::IAsset* canonicalAsset, const core::blake3_hash_t& contentHash, const IDeviceMemoryBacked* memoryBacked) const
+			{
+				return ~0u;
+			}
+
 			// most plain PNG, JPG, etc. loaders don't produce images with mip chains/tails
 			virtual inline uint8_t getMipLevelCount(const size_t groupCopyID, const asset::ICPUImage* image, const patch_t<asset::ICPUImage>& patch) const
 			{
diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp
@@ -2018,8 +2018,12 @@ class MetaDeviceMemoryAllocator final
 		{
 			auto* gpuObj = pGpuObj->get();
 			const IDeviceMemoryBacked::SDeviceMemoryRequirements& memReqs = gpuObj->getMemoryReqs();
-			// this shouldn't be possible
-			assert(memReqs.memoryTypeBits&memoryTypeConstraint);
+			// overconstrained
+			if ((memReqs.memoryTypeBits&memoryTypeConstraint)==0)
+			{
+				m_logger.log("Overconstrained the Memory Type Index bitmask %d with %d for %s",system::ILogger::ELL_ERROR,memReqs.memoryTypeBits,memoryTypeConstraint,gpuObj->getObjectDebugName());
+				return false;
+			}
 			//
 			bool needsDeviceAddress = false;
 			if constexpr (std::is_same_v<std::remove_pointer_t<decltype(gpuObj)>,IGPUBuffer>)
@@ -3323,7 +3327,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
 					// record if a device memory allocation will be needed
 					if constexpr (std::is_base_of_v<IDeviceMemoryBacked,typename asset_traits<AssetType>::video_t>)
 					{
-						if (!deferredAllocator.request(&created.gpuObj))
+						const auto constrainMask = inputs.constrainMemoryTypeBits(uniqueCopyGroupID,instance.asset,contentHash,created.gpuObj.get());
+						if (!deferredAllocator.request(&created.gpuObj,constrainMask))
 						{
 							created.gpuObj.value = nullptr;
 							return;
@@ -3352,6 +3357,32 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
 		dedupCreateProp.operator()<ICPUImage>();
 		// now allocate the memory for buffers and images
 		deferredAllocator.finalize();
+
+		// can remove buffers from conversion requests which can be written to directly
+		{
+			core::vector<ILogicalDevice::MappedMemoryRange> flushRanges;
+			flushRanges.reserve(retval.m_bufferConversions.size());
+			std::erase_if(retval.m_bufferConversions,[&flushRanges](const SReserveResult::SConvReqBuffer& conv)->bool
+				{
+					const auto boundMemory = conv.gpuObj->getBoundMemory();
+					auto* const memory = boundMemory.memory;
+					if (!boundMemory.memory->isMappable())
+						return false;
+					const size_t size = conv.gpuObj->getSize();
+					const IDeviceMemoryAllocation::MemoryRange range = {boundMemory.offset,size};
+					// slightly inefficient but oh well
+					void* dst = memory->map(range,IDeviceMemoryAllocation::EMCAF_WRITE);
+					memcpy(dst,conv.canonical->getPointer(),size);
+					if (boundMemory.memory->haveToMakeVisible())
+						flushRanges.emplace_back(memory,range.offset,range.length,ILogicalDevice::MappedMemoryRange::align_non_coherent_tag);
+					return true;
+				}
+			);
+			if (!flushRanges.empty())
+				device->flushMappedMemoryRanges(flushRanges);
+		}
+
+
 #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
 		// Deal with Deferred Creation of Acceleration structures
 		{

Original file line number	Diff line number	Diff line change
`@@ -826,6 +826,13 @@ class CAssetConverter : public core::IReferenceCounted`
`826`	`826`	`return {};`
`827`	`827`	`}`
`828`	`828`
	`829`	`+ // If you absolutely need to avoid some memory type for your image or buffer, you can specify a mask here`
	`830`	`+ // one example would be to use HOST_VISIBLE to make sure your buffer can be written directly and doesn't need to go through staging`
	`831`	`+ virtual inline uint32_t constrainMemoryTypeBits(const size_t groupCopyID, const asset::IAsset* canonicalAsset, const core::blake3_hash_t& contentHash, const IDeviceMemoryBacked* memoryBacked) const`
	`832`	`+ {`
	`833`	`+ return ~0u;`
	`834`	`+ }`
	`835`	`+`
`829`	`836`	`// most plain PNG, JPG, etc. loaders don't produce images with mip chains/tails`
`830`	`837`	`virtual inline uint8_t getMipLevelCount(const size_t groupCopyID, const asset::ICPUImage* image, const patch_t<asset::ICPUImage>& patch) const`
`831`	`838`	`{`