Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
6a74ab1
Checkpoint 1 is close
Fletterio Jul 23, 2025
e523868
Off by one error fix
Fletterio Jul 24, 2025
a54f6c6
Fix tile offsets for upload
Fletterio Jul 24, 2025
c3f7d04
Skeleton done but currently bugged, some byte offset is wrong (relate…
Fletterio Jul 28, 2025
7a5e948
Fix square bytes computation
Fletterio Jul 29, 2025
52d947d
Checkpoint 1!
Fletterio Jul 30, 2025
665559b
Save before merge
Fletterio Jul 31, 2025
a24281a
Merge + incorporate obb shrinking at the edges
Fletterio Aug 7, 2025
fc2d504
Bug: using uploaded uvs seems to stretch/not shrink along v direction
Fletterio Aug 8, 2025
e61e389
Fixed y-axis bug
Fletterio Aug 8, 2025
258920c
Diagonal computation
Fletterio Aug 10, 2025
6a1b76e
Some names are wrong here, but the example still works
Fletterio Aug 13, 2025
0ed564e
Tile tracking done
Fletterio Aug 17, 2025
f638ca6
Cleaning up the code following PR review
Fletterio Aug 19, 2025
89af347
Checkpoint for Phase 2
Fletterio Aug 20, 2025
f3532fe
Addressed Erfan PR messages
Fletterio Aug 22, 2025
888bcb1
Addressed some PR comments, checkpoint before modifying UV logic
Fletterio Aug 27, 2025
f0ba40f
Another checkpoint before modifying UV logic
Fletterio Aug 29, 2025
dc322da
Checkpoint: example mip level emulated computation
Fletterio Sep 10, 2025
2be88a5
nPoT handled!
Fletterio Sep 12, 2025
8a02379
Cleanup, some precomputes
Fletterio Sep 15, 2025
7330383
Some more brief updates
Fletterio Sep 16, 2025
452bee7
Some minor refactors, added some padding to max tile comp for viewpor…
Fletterio Sep 16, 2025
932cb74
Mirrored changes on n4ce after PR review
Fletterio Sep 17, 2025
bb3c3e8
Changes following PR review, to be moved to n4ce
Fletterio Sep 18, 2025
b232c21
Add a whole texel shift
Fletterio Sep 23, 2025
72d7930
Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla-…
AnastaZIuk Oct 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 168 additions & 24 deletions 62_CAD/DrawResourcesFiller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -631,20 +631,19 @@ bool DrawResourcesFiller::ensureMultipleStaticImagesAvailability(std::span<Stati
return true;
}

bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit)
bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded(StreamedImageManager& manager, SIntendedSubmitInfo& intendedNextSubmit)
{
auto* device = m_utilities->getLogicalDevice();
auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice();

// Try inserting or updating the image usage in the cache.
// If the image is already present, updates its semaphore value.
auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); };
CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback);
CachedImageRecord* cachedImageRecord = imagesCache->insert(manager.georeferencedImageParams.imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback);

// TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENED TYPE
// TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENCED TYPE
IGPUImage::SCreationParams imageCreationParams = {};
ImageType georeferenceImageType;
determineGeoreferencedImageCreationParams(imageCreationParams, georeferenceImageType, params);
determineGeoreferencedImageCreationParams(imageCreationParams, manager);

// imageParams = cpuImage->getCreationParameters();
imageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT;
Expand All @@ -671,11 +670,11 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded(
const auto cachedImageType = cachedImageRecord->type;
// image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus
const auto currentParams = static_cast<asset::IImage::SCreationParams>(imageCreationParams);
const bool needsRecreation = cachedImageType != georeferenceImageType || cachedParams != currentParams;
const bool needsRecreation = cachedImageType != manager.imageType || cachedParams != currentParams;
if (needsRecreation)
{
// call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena.
evictCallback(imageID, *cachedImageRecord);
evictCallback(manager.georeferencedImageParams.imageID, *cachedImageRecord);

// instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image
*cachedImageRecord = CachedImageRecord(currentFrameIndex);
Expand Down Expand Up @@ -705,17 +704,17 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded(
if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address)
{
// Attempt to create a GPU image and image view for this texture.
ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(imageID));
ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(manager.georeferencedImageParams.imageID));

if (allocResults.isValid())
{
cachedImageRecord->type = georeferenceImageType;
cachedImageRecord->type = manager.imageType;
cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND;
cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN
cachedImageRecord->allocationOffset = allocResults.allocationOffset;
cachedImageRecord->allocationSize = allocResults.allocationSize;
cachedImageRecord->gpuImageView = allocResults.gpuImageView;
cachedImageRecord->staticCPUImage = nullptr;
cachedImageRecord->staticCPUImage = manager.georeferencedImageParams.geoReferencedImage;
}
else
{
Expand Down Expand Up @@ -743,7 +742,7 @@ bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded(
}

// erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation
imagesCache->erase(imageID);
imagesCache->erase(manager.georeferencedImageParams.imageID);
}
}
else
Expand Down Expand Up @@ -867,7 +866,7 @@ void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundin
endMainObject();
}

void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit)
void DrawResourcesFiller::addGeoreferencedImage(StreamedImageManager& manager, const float64_t3x3& NDCToWorld, SIntendedSubmitInfo& intendedNextSubmit)
{
beginMainObject(MainObjectType::STREAMED_IMAGE);

Expand All @@ -879,11 +878,21 @@ void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const Georefer
return;
}

// Generate upload data
auto uploadData = manager.generateTileUploadData(NDCToWorld);

// Queue image uploads - if necessary
if (manager.imageType == ImageType::GEOREFERENCED_STREAMED)
{
for (const auto& imageCopy : uploadData.tiles)
queueGeoreferencedImageCopy_Internal(manager.georeferencedImageParams.imageID, imageCopy);
}

GeoreferencedImageInfo info = {};
info.topLeft = params.worldspaceOBB.topLeft;
info.dirU = params.worldspaceOBB.dirU;
info.aspectRatio = params.worldspaceOBB.aspectRatio;
info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory
info.topLeft = uploadData.worldspaceOBB.topLeft;
info.dirU = uploadData.worldspaceOBB.dirU;
info.aspectRatio = uploadData.worldspaceOBB.aspectRatio;
info.textureID = getImageIndexFromID(manager.georeferencedImageParams.imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory
if (!addGeoreferencedImageInfo_Internal(info, mainObjIdx))
{
// single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects
Expand Down Expand Up @@ -1370,7 +1379,7 @@ bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedN
std::vector<CachedImageRecord*> nonResidentImageRecords;
for (auto& [id, record] : imagesCache)
{
if (record.staticCPUImage && record.type == ImageType::STATIC && record.state < ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA)
if (record.staticCPUImage && (record.type == ImageType::STATIC || record.type == ImageType::GEOREFERENCED_FULL_RESOLUTION) && record.state < ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA)
nonResidentImageRecords.push_back(&record);
}

Expand Down Expand Up @@ -1557,7 +1566,7 @@ bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intende
std::vector<IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t> afterCopyImageBarriers;
afterCopyImageBarriers.reserve(streamedImageCopies.size());

// Pipeline Barriers before imageCopy
// Pipeline Barriers after imageCopy
for (auto& [imageID, imageCopies] : streamedImageCopies)
{
auto* imageRecord = imagesCache->peek(imageID);
Expand Down Expand Up @@ -2461,30 +2470,43 @@ DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAlloc
return ret;
}

void DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams)
void DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, StreamedImageManager& manager)
{
auto& georeferencedImageParams = manager.georeferencedImageParams;
// Decide whether the image can reside fully into memory rather than get streamed.
// TODO: Improve logic, currently just a simple check to see if the full-screen image has more pixels that viewport or not
// TODO: add criterial that the size of the full-res image shouldn't consume more than 30% of the total memory arena for images (if we allowed larger than viewport extents)
const bool betterToResideFullyInMem = georeferencedImageParams.imageExtents.x * georeferencedImageParams.imageExtents.y <= georeferencedImageParams.viewportExtents.x * georeferencedImageParams.viewportExtents.y;

if (betterToResideFullyInMem)
outImageType = ImageType::GEOREFERENCED_FULL_RESOLUTION;
manager.imageType = ImageType::GEOREFERENCED_FULL_RESOLUTION;
else
outImageType = ImageType::GEOREFERENCED_STREAMED;
manager.imageType = ImageType::GEOREFERENCED_STREAMED;

outImageParams.type = asset::IImage::ET_2D;
outImageParams.samples = asset::IImage::ESCF_1_BIT;
outImageParams.format = georeferencedImageParams.format;

if (outImageType == ImageType::GEOREFERENCED_FULL_RESOLUTION)
if (manager.imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION)
{
outImageParams.extent = { georeferencedImageParams.imageExtents.x, georeferencedImageParams.imageExtents.y, 1u };
}
else
{
// TODO: Better Logic, area around the view, etc...
outImageParams.extent = { georeferencedImageParams.viewportExtents.x, georeferencedImageParams.viewportExtents.y, 1u };
// Pad sides to multiple of tileSize. Even after rounding up, we might still need to add an extra tile to cover both sides.
// I added two to be safe and to have issues at the borders.
const auto xExtent = core::roundUp(georeferencedImageParams.viewportExtents.x, manager.TileSize) + 2 * manager.TileSize;
const auto yExtent = core::roundUp(georeferencedImageParams.viewportExtents.y, manager.TileSize) + 2 * manager.TileSize;
outImageParams.extent = { xExtent, yExtent, 1u };
manager.maxResidentTiles.x = xExtent / manager.TileSize;
manager.maxResidentTiles.y = yExtent / manager.TileSize;
// Create a "sliding window OBB" that we use to offset tiles
manager.fromTopLeftOBB.topLeft = georeferencedImageParams.worldspaceOBB.topLeft;
manager.fromTopLeftOBB.dirU = georeferencedImageParams.worldspaceOBB.dirU * float32_t(manager.TileSize * manager.maxResidentTiles.x) / float32_t(georeferencedImageParams.imageExtents.x);
manager.fromTopLeftOBB.aspectRatio = float32_t(manager.maxResidentTiles.y) / float32_t(manager.maxResidentTiles.x);
// I think aspect ratio can stay the same since worldspace OBB and imageExtents should have same aspect ratio.
// If the image can be stretched/sheared and not simply rotated, then the aspect ratio *might* have to change, although I think that's covered by
// the OBB's aspect ratio
}


Expand Down Expand Up @@ -2624,4 +2646,126 @@ void DrawResourcesFiller::flushDrawObjects()
drawCalls.push_back(drawCall);
drawObjectsFlushedToDrawCalls = resourcesCollection.drawObjects.getCount();
}
}

DrawResourcesFiller::StreamedImageManager::StreamedImageManager(GeoreferencedImageParams&& _georeferencedImageParams)
: georeferencedImageParams(std::move(_georeferencedImageParams))
{
maxImageTileIndices = georeferencedImageParams.imageExtents / uint32_t2(TileSize, TileSize);
// If it fits perfectly along any dimension, we need one less tile with this scheme
maxImageTileIndices -= uint32_t2(maxImageTileIndices.x * TileSize == georeferencedImageParams.imageExtents.x, maxImageTileIndices.y * TileSize == georeferencedImageParams.imageExtents.y);

// R^2 can be covered with a lattice of image tiles. Real tiles (those actually covered by the image) are indexed in the range [0, maxImageTileIndices.x] x [0, maxImageTileIndices.y],
// but part of the algorithm to figure out which tiles need to be resident for a draw involves figuring out the coordinates in this lattice of each of the viewport corners.
// To that end, we devise an algorithm that maps a point in worldspace to its coordinates in this tile lattice:
// 1. Get the displacement (will be an offset vector in world coords and world units) from the `topLeft` corner of the image to the point
// 2. Transform this displacement vector into a displacement into the coordinates spanned by the basis {dirU, dirV}. Notice that these vectors are still in world units
// 3. Map world units to tile units. This scaling is generally nonuniform, since it depends on the ratio of pixels to world units per coordinate.
// The name of the `offsetCoBScaleMatrix` follows by what is computed at each step

// 1. Displacement. The following matrix computes the offset for an input point `p` with homogenous worldspace coordinates.
// By foregoing the homogenous coordinate we can keep only the vector part, that's why it's `2x3` and not `3x3`
float64_t2 topLeftWorld = georeferencedImageParams.worldspaceOBB.topLeft;
float64_t2x3 displacementMatrix(1., 0., - topLeftWorld.x, 0., 1., - topLeftWorld.y);

// 2. Change of Basis. Since {dirU, dirV} are orthogonal, the matrix to change from world coords to `span{dirU, dirV}` coords has a quite nice expression
// Non-uniform scaling doesn't affect this, but this has to change if we allow for shearing (basis vectors stop being orthogonal)
float64_t2 dirU = georeferencedImageParams.worldspaceOBB.dirU;
float64_t2 dirV = float32_t2(dirU.y, -dirU.x) * georeferencedImageParams.worldspaceOBB.aspectRatio;
float64_t dirULengthSquared = nbl::hlsl::dot(dirU, dirU);
float64_t dirVLengthSquared = nbl::hlsl::dot(dirV, dirV);
float64_t2 firstRow = dirU / dirULengthSquared;
float64_t2 secondRow = dirV / dirVLengthSquared;
float64_t2x2 changeOfBasisMatrix(firstRow, secondRow);

// 3. Scaling. The vector obtained by doing `CoB * displacement * p` are now the coordinates in the `span{dirU, dirV}`, which would be `uv` coordinates in [0,1]^2
// (or outside this range for points not in the image). To get tile lattice coordinates, we need to scale this number by an nTiles vector which counts
// (fractionally) how many tiles fit in the image along each axis
float32_t2 nTiles = float32_t2(georeferencedImageParams.imageExtents) / float32_t2(TileSize, TileSize);
float64_t2x2 scaleMatrix(nTiles.x, 0., 0., nTiles.y);

// Put them all together
offsetCoBScaleMatrix = nbl::hlsl::mul(scaleMatrix, nbl::hlsl::mul(changeOfBasisMatrix, displacementMatrix));
}

DrawResourcesFiller::StreamedImageManager::TileUploadData DrawResourcesFiller::StreamedImageManager::generateTileUploadData(const float64_t3x3& NDCToWorld)
{
if (imageType == ImageType::GEOREFERENCED_FULL_RESOLUTION)
return TileUploadData{ {}, georeferencedImageParams.worldspaceOBB };

// Following need only be done if image is actually streamed

// Using Vulkan NDC, the viewport has coordinates in the range [-1, -1] x [1,1]. First we get the world coordinates of the viewport corners, in homogenous
const float64_t3 topLeftNDCH(-1., -1., 1.);
const float64_t3 topRightNDCH(1., -1., 1.);
const float64_t3 bottomLeftNDCH(-1., 1., 1.);
const float64_t3 bottomRightNDCH(1., 1., 1.);

const float64_t3 topLeftWorldH = nbl::hlsl::mul(NDCToWorld, topLeftNDCH);
const float64_t3 topRightWorldH = nbl::hlsl::mul(NDCToWorld, topRightNDCH);
const float64_t3 bottomLeftWorldH = nbl::hlsl::mul(NDCToWorld, bottomLeftNDCH);
const float64_t3 bottomRightWorldH = nbl::hlsl::mul(NDCToWorld, bottomRightNDCH);

// We can use `offsetCoBScaleMatrix` to get tile lattice coordinates for each of these points
const float64_t2 topLeftTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, topLeftWorldH);
const float64_t2 topRightTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, topRightWorldH);
const float64_t2 bottomLeftTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, bottomLeftWorldH);
const float64_t2 bottomRightTileLattice = nbl::hlsl::mul(offsetCoBScaleMatrix, bottomRightWorldH);

// Get the min and max of each lattice coordinate
const float64_t2 minTop = nbl::hlsl::min(topLeftTileLattice, topRightTileLattice);
const float64_t2 minBottom = nbl::hlsl::min(bottomLeftTileLattice, bottomRightTileLattice);
const float64_t2 minAll = nbl::hlsl::min(minTop, minBottom);

const float64_t2 maxTop = nbl::hlsl::max(topLeftTileLattice, topRightTileLattice);
const float64_t2 maxBottom = nbl::hlsl::max(bottomLeftTileLattice, bottomRightTileLattice);
const float64_t2 maxAll = nbl::hlsl::max(maxTop, maxBottom);

// Floor them to get an integer for the tiles they're in
const int32_t2 minAllFloored = nbl::hlsl::floor(minAll);
const int32_t2 maxAllFloored = nbl::hlsl::floor(maxAll);

// Clamp them to reasonable tile indices
minLoadedTileIndices = nbl::hlsl::clamp(minAllFloored, int32_t2(0, 0), int32_t2(maxImageTileIndices));
maxLoadedTileIndices = nbl::hlsl::clamp(maxAllFloored, int32_t2(0, 0), nbl::hlsl::min(int32_t2(maxImageTileIndices), int32_t2(minLoadedTileIndices + maxResidentTiles - uint32_t2(1,1))));

// Now we have the indices of the tiles we want to upload, so create the vector of `StreamedImageCopies` - 1 per tile.
core::vector<StreamedImageCopy> tiles;
tiles.reserve((maxLoadedTileIndices.x - minLoadedTileIndices.x + 1) * (maxLoadedTileIndices.y - minLoadedTileIndices.y + 1));

// Assuming a 1 pixel per block format - otherwise math here gets a bit trickier
auto bytesPerPixel = getTexelOrBlockBytesize(georeferencedImageParams.format);
const size_t bytesPerSide = bytesPerPixel * TileSize;

// Dangerous code - assumes image can be perfectly covered with tiles. Otherwise will need to handle edge cases
for (uint32_t tileX = minLoadedTileIndices.x; tileX <= maxLoadedTileIndices.x; tileX++)
{
for (uint32_t tileY = minLoadedTileIndices.y; tileY <= maxLoadedTileIndices.y; tileY++)
{
asset::IImage::SBufferCopy bufCopy;
bufCopy.bufferOffset = (tileY * (maxImageTileIndices.x + 1) * TileSize + tileX) * bytesPerSide;
bufCopy.bufferRowLength = georeferencedImageParams.imageExtents.x;
bufCopy.bufferImageHeight = 0;
bufCopy.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT;
bufCopy.imageSubresource.mipLevel = 0u;
bufCopy.imageSubresource.baseArrayLayer = 0u;
bufCopy.imageSubresource.layerCount = 1u;
bufCopy.imageOffset = { (tileX - minLoadedTileIndices.x) * TileSize, (tileY - minLoadedTileIndices.y) * TileSize, 0u };
bufCopy.imageExtent.width = TileSize;
bufCopy.imageExtent.height = TileSize;
bufCopy.imageExtent.depth = 1;

tiles.emplace_back(georeferencedImageParams.format, georeferencedImageParams.geoReferencedImage->getBuffer(), std::move(bufCopy));
}
}

// Last, we need to figure out an obb that covers only the currently loaded tiles
// By shifting the `fromTopLeftOBB` an appropriate number of tiles in each direction, we get an obb that covers at least the uploaded tiles
// It might cover more tiles, possible some that are not even loaded into VRAM, but since those fall outside of the viewport we don't really care about them
OrientedBoundingBox2D worldspaceOBB = fromTopLeftOBB;
const float32_t2 dirV = float32_t2(worldspaceOBB.dirU.y, -worldspaceOBB.dirU.x) * worldspaceOBB.aspectRatio;
worldspaceOBB.topLeft += worldspaceOBB.dirU * float32_t(minLoadedTileIndices.x) / float32_t(maxResidentTiles.x);
worldspaceOBB.topLeft += dirV * float32_t(minLoadedTileIndices.y) / float32_t(maxResidentTiles.y);
return TileUploadData{ std::move(tiles), worldspaceOBB };

}
Loading