Skip to content
Draft
33 changes: 20 additions & 13 deletions 08_HelloSwapchain/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ class WindowedApplication : public virtual BasicMultiQueueApplication
public:
using base_t::base_t;

// We inherit from an application that tries to find Graphics and Compute queues
// because applications with presentable images often want to perform Graphics family operations
virtual bool isComputeOnly() const {return false;}

virtual video::IAPIConnection::SFeatures getAPIFeaturesToEnable() override
{
auto retval = base_t::getAPIFeaturesToEnable();
Expand All @@ -26,22 +30,23 @@ class WindowedApplication : public virtual BasicMultiQueueApplication
}

// New function, we neeed to know about surfaces to create ahead of time
virtual core::vector<const video::SPhysicalDeviceFilter::SurfaceCompatibility> getSurfaces() const = 0;
virtual core::vector<video::SPhysicalDeviceFilter::SurfaceCompatibility> getSurfaces() const = 0;

virtual core::set<video::IPhysicalDevice*> filterDevices(const core::SRange<video::IPhysicalDevice* const>& physicalDevices) const
// We have a very simple heuristic, the device must be able to render to all windows!
// (want to make something more complex? you're on your own!)
virtual void filterDevices(core::set<video::IPhysicalDevice*>& physicalDevices) const
{
const auto firstFilter = base_t::filterDevices(physicalDevices);
base_t::filterDevices(physicalDevices);

video::SPhysicalDeviceFilter deviceFilter = {};

const auto surfaces = getSurfaces();
deviceFilter.requiredSurfaceCompatibilities = surfaces.data();
deviceFilter.requiredSurfaceCompatibilitiesCount = surfaces.size();
auto surfaces = getSurfaces();
deviceFilter.requiredSurfaceCompatibilities = {surfaces};

return deviceFilter(physicalDevices);
}

virtual bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override
virtual bool onAppInitialized(core::smart_refctd_ptr<system::ISystem>&& system) override
{
// Remember to call the base class initialization!
if (!base_t::onAppInitialized(std::move(system)))
Expand All @@ -52,6 +57,7 @@ class WindowedApplication : public virtual BasicMultiQueueApplication
#else
#error "Unimplemented!"
#endif
return true;
}

core::smart_refctd_ptr<ui::IWindowManager> m_winMgr;
Expand Down Expand Up @@ -87,7 +93,7 @@ class SingleNonResizableWindowApplication : public virtual WindowedApplication
public:
using base_t::base_t;

virtual bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override
virtual bool onAppInitialized(core::smart_refctd_ptr<system::ISystem>&& system) override
{
// Remember to call the base class initialization!
if (!base_t::onAppInitialized(std::move(system)))
Expand All @@ -98,7 +104,7 @@ class SingleNonResizableWindowApplication : public virtual WindowedApplication
return true;
}

virtual core::vector<const video::SPhysicalDeviceFilter::SurfaceCompatibility> getSurfaces() const
virtual core::vector<video::SPhysicalDeviceFilter::SurfaceCompatibility> getSurfaces() const
{
return {{m_surface.get()/*,EQF_NONE*/}};
}
Expand All @@ -112,15 +118,15 @@ class SingleNonResizableWindowApplication : public virtual WindowedApplication
}

protected:
virtual IWindow::SCreationParams getWindowCreationParams() const
virtual ui::IWindow::SCreationParams getWindowCreationParams() const
{
IWindow::SCreationParams params = {};
params.callback = make_smart_refctd_ptr<IWindowClosedCallback>();
ui::IWindow::SCreationParams params = {};
params.callback = core::make_smart_refctd_ptr<IWindowClosedCallback>();
params.width = 640;
params.height = 480;
params.x = 32;
params.y = 32;
params.flags = IWindow::ECF_NONE;
params.flags = ui::IWindow::ECF_NONE;
params.windowCaption = "SingleNonResizableWindowApplication";
return params;
}
Expand All @@ -130,6 +136,7 @@ class SingleNonResizableWindowApplication : public virtual WindowedApplication
};
}

#include "nbl/video/CVulkanSwapchain.h"

using namespace nbl;
using namespace core;
Expand Down
24 changes: 24 additions & 0 deletions 67_SubAllocatedDescriptorSet/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
include(common RESULT_VARIABLE RES)
if(NOT RES)
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
endif()

nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}")

if(NBL_EMBED_BUILTIN_RESOURCES)
set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData)
set(RESOURCE_DIR "app_resources")

get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE)
get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE)

file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*")
foreach(RES_FILE ${BUILTIN_RESOURCE_FILES})
LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}")
endforeach()

ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")

LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
endif()
Comment on lines +1 to +24

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd make this example 1x or 2x, using 2x for basic utility/extension tests now

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

number 27 is up for grabs

28 changes: 28 additions & 0 deletions 67_SubAllocatedDescriptorSet/config.json.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"enableParallelBuild": true,
"threadsPerBuildProcess" : 2,
"isExecuted": false,
"scriptPath": "",
"cmake": {
"configurations": [ "Release", "Debug", "RelWithDebInfo" ],
"buildModes": [],
"requiredOptions": []
},
"profiles": [
{
"backend": "vulkan", // should be none
"platform": "windows",
"buildModes": [],
"runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example
"gpuArchitectures": []
}
],
"dependencies": [],
"data": [
{
"dependencies": [],
"command": [""],
"outputs": []
}
]
}
190 changes: 190 additions & 0 deletions 67_SubAllocatedDescriptorSet/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h


#include "nbl/video/surface/CSurfaceVulkan.h"
#include "nbl/video/alloc/SubAllocatedDescriptorSet.h"

#include "../common/BasicMultiQueueApplication.hpp"
#include "../common/MonoAssetManagerAndBuiltinResourceApplication.hpp"

using namespace nbl;
using namespace core;
using namespace system;
using namespace ui;
using namespace asset;
using namespace video;

#include "nbl/builtin/hlsl/bit.hlsl"

// In this application we'll cover buffer streaming, Buffer Device Address (BDA) and push constants
class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplication, public examples::MonoAssetManagerAndBuiltinResourceApplication
{
using device_base_t = examples::MonoDeviceApplication;
using asset_base_t = examples::MonoAssetManagerAndBuiltinResourceApplication;

// The pool cache is just a formalized way of round-robining command pools and resetting + reusing them after their most recent submit signals finished.
// Its a little more ergonomic to use if you don't have a 1:1 mapping between frames and pools.
smart_refctd_ptr<nbl::video::ICommandPoolCache> m_poolCache;

smart_refctd_ptr<nbl::video::SubAllocatedDescriptorSet> m_subAllocDescriptorSet;

// This example really lets the advantages of a timeline semaphore shine through!
smart_refctd_ptr<ISemaphore> m_timeline;
uint64_t m_iteration = 0;
constexpr static inline uint64_t MaxIterations = 200;

constexpr static inline uint32_t MaxDescriptorSetAllocationAlignment = 64u*1024u; // if you need larger alignments then you're not right in the head
constexpr static inline uint32_t MinDescriptorSetAllocationSize = 1u;

public:
// Yay thanks to multiple inheritance we cannot forward ctors anymore
SubAllocatedDescriptorSetApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) :
system::IApplicationFramework(_localInputCWD,_localOutputCWD,_sharedInputCWD,_sharedOutputCWD) {}

// we stuff all our work here because its a "single shot" app
bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override
{
using nbl::video::IGPUDescriptorSetLayout;

// Remember to call the base class initialization!
if (!device_base_t::onAppInitialized(std::move(system)))
return false;
if (!asset_base_t::onAppInitialized(std::move(system)))
return false;


// We'll allow subsequent iterations to overlap each other on the GPU, the only limiting factors are
// the amount of memory in the streaming buffers and the number of commandpools we can use simultaenously.
constexpr auto MaxConcurrency = 64;

// Since this time we don't throw the Command Pools away and we'll reset them instead, we don't create the pools with the transient flag
m_poolCache = ICommandPoolCache::create(core::smart_refctd_ptr(m_device),getComputeQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::NONE,MaxConcurrency);

// In contrast to fences, we just need one semaphore to rule all dispatches
m_timeline = m_device->createSemaphore(m_iteration);

// Descriptor set sub allocator

video::IGPUDescriptorSetLayout::SBinding bindings[1];
{
bindings[0].binding = 0;
bindings[0].count = 65536u;
bindings[0].createFlags = core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT)
| IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT
| IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT;
bindings[0].type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE;
bindings[0].stageFlags = asset::IShader::E_SHADER_STAGE::ESS_COMPUTE;
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

try to use const and designated initializers if you can


std::span<video::IGPUDescriptorSetLayout::SBinding> bindingsSpan(bindings);

// TODO: I don't think these are needed for sub allocated descriptor sets (alignment isn't needed, and min size is 1)
auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr<nbl::video::SubAllocatedDescriptorSet>(
bindings, MaxDescriptorSetAllocationAlignment, MinDescriptorSetAllocationSize
);

std::vector<uint32_t> allocation, size;
{
for (uint32_t i = 0; i < 512; i++)
{
allocation.push_back(core::GeneralpurposeAddressAllocator<uint32_t>::invalid_address);
size.push_back(4);
}
subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), &allocation[0], &size[0]);
for (uint32_t i = 0; i < allocation.size(); i++)
{
m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]);
assert(allocation[i] != core::GeneralpurposeAddressAllocator<uint32_t>::invalid_address);
}
}
{
std::vector<uint32_t> addr, freeSize;
for (uint32_t i = 0; i < 512; i+=2)
{
addr.push_back(allocation[i]);
freeSize.push_back(4);
}
subAllocatedDescriptorSet->multi_deallocate(0, addr.size(), &addr[0], &freeSize[0]);
}

m_logger->log("Freed some allocations", system::ILogger::ELL_INFO);
allocation.clear();
size.clear();
{
for (uint32_t i = 0; i < 512; i++)
{
allocation.push_back(core::GeneralpurposeAddressAllocator<uint32_t>::invalid_address);
size.push_back(2);
}
subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), &allocation[0], &size[0]);
for (uint32_t i = 0; i < allocation.size(); i++)
{
m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]);
assert(allocation[i] != core::GeneralpurposeAddressAllocator<uint32_t>::invalid_address);
}
}

return true;
}

// Ok this time we'll actually have a work loop (maybe just for the sake of future WASM so we don't timeout a Browser Tab with an unresponsive script)
bool keepRunning() override { return m_iteration<MaxIterations; }

// Finally the first actual work-loop
void workLoopBody() override
{
IQueue* const queue = getComputeQueue();

// Obtain our command pool once one gets recycled
uint32_t poolIx;
do
{
poolIx = m_poolCache->acquirePool();
} while (poolIx==ICommandPoolCache::invalid_index);

smart_refctd_ptr<IGPUCommandBuffer> cmdbuf;
{
m_poolCache->getPool(poolIx)->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&cmdbuf,1},core::smart_refctd_ptr(m_logger));
// lets record, its still a one time submit because we have to re-record with different push constants each time
cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);

// COMMAND RECORDING

auto result = cmdbuf->end();
assert(result);
}


const auto savedIterNum = m_iteration++;
{
const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo =
{
.cmdbuf = cmdbuf.get()
};
const IQueue::SSubmitInfo::SSemaphoreInfo signalInfo =
{
.semaphore = m_timeline.get(),
.value = m_iteration,
.stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT
};
// Generally speaking we don't need to wait on any semaphore because in this example every dispatch gets its own clean piece of memory to use
// from the point of view of the GPU. Implicit domain operations between Host and Device happen upon a submit and a semaphore/fence signal operation,
// this ensures we can touch the input and get accurate values from the output memory using the CPU before and after respectively, each submit becoming PENDING.
// If we actually cared about this submit seeing the memory accesses of a previous dispatch we could add a semaphore wait
const IQueue::SSubmitInfo submitInfo = {
.waitSemaphores = {},
.commandBuffers = {&cmdbufInfo,1},
.signalSemaphores = {&signalInfo,1}
};

queue->startCapture();
auto statusCode = queue->submit({ &submitInfo,1 });
queue->endCapture();
assert(statusCode == IQueue::RESULT::SUCCESS);
}
}
};

NBL_MAIN_FUNC(SubAllocatedDescriptorSetApp)
50 changes: 50 additions & 0 deletions 67_SubAllocatedDescriptorSet/pipeline.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import org.DevshGraphicsProgramming.Agent
import org.DevshGraphicsProgramming.BuilderInfo
import org.DevshGraphicsProgramming.IBuilder

class CStreamingAndBufferDeviceAddressBuilder extends IBuilder
{
public CStreamingAndBufferDeviceAddressBuilder(Agent _agent, _info)
{
super(_agent, _info)
}

@Override
public boolean prepare(Map axisMapping)
{
return true
}

@Override
public boolean build(Map axisMapping)
{
IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION")
IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE")

def nameOfBuildDirectory = getNameOfBuildDirectory(buildType)
def nameOfConfig = getNameOfConfig(config)

agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v")

return true
}

@Override
public boolean test(Map axisMapping)
{
return true
}

@Override
public boolean install(Map axisMapping)
{
return true
}
}

def create(Agent _agent, _info)
{
return new CStreamingAndBufferDeviceAddressBuilder(_agent, _info)
}

return this
Loading