Precompile and cache EX31 path tracer variants#262
Precompile and cache EX31 path tracer variants#262AnastaZIuk wants to merge 35 commits intomasterfrom
Conversation
31_HLSLPathTracer/include/nbl/builtin/hlsl/bxdf/ndf/microfacet_to_light_transform.hlsl
Outdated
Show resolved
Hide resolved
31_HLSLPathTracer/main.cpp
Outdated
| const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { | ||
| "ELG_SPHERE", | ||
| "ELG_TRIANGLE", | ||
| "ELG_RECTANGLE" | ||
| }; | ||
| const char* polygonMethodNames[EPM_COUNT] = { | ||
| "Area", | ||
| "Solid Angle", | ||
| "Projected Solid Angle" | ||
| }; |
There was a problem hiding this comment.
system::to_string_helper specializations is now our preferred way
There was a problem hiding this comment.
resolved + moved to separate header here but I think I might slap it into nabla
There was a problem hiding this comment.
not Nabla its a thing only for this example
31_HLSLPathTracer/main.cpp
Outdated
| // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` | ||
| auto createBufferFromCacheFile = [this]( | ||
| system::path filename, | ||
| size_t bufferSize, | ||
| void *data, | ||
| const system::path& filePath, | ||
| size_t byteSize, | ||
| void* data, | ||
| smart_refctd_ptr<ICPUBuffer>& buffer | ||
| ) -> std::pair<smart_refctd_ptr<IFile>, bool> | ||
| ) -> bool | ||
| { | ||
| ISystem::future_t<smart_refctd_ptr<nbl::system::IFile>> owenSamplerFileFuture; | ||
| ISystem::future_t<size_t> owenSamplerFileReadFuture; | ||
| size_t owenSamplerFileBytesRead; | ||
| size_t owenSamplerFileBytesRead = 0ull; | ||
|
|
||
| m_system->createFile(owenSamplerFileFuture, localOutputCWD / filename, IFile::ECF_READ); | ||
| m_system->createFile(owenSamplerFileFuture, filePath, IFile::ECF_READ); | ||
| smart_refctd_ptr<IFile> owenSamplerFile; | ||
|
|
||
| if (owenSamplerFileFuture.wait()) | ||
| { | ||
| owenSamplerFileFuture.acquire().move_into(owenSamplerFile); | ||
| if (!owenSamplerFile) | ||
| return { nullptr, false }; | ||
| return false; | ||
|
|
||
| owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, bufferSize); | ||
| owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, byteSize); | ||
| if (owenSamplerFileReadFuture.wait()) | ||
| { | ||
| owenSamplerFileReadFuture.acquire().move_into(owenSamplerFileBytesRead); | ||
|
|
||
| if (owenSamplerFileBytesRead < bufferSize) | ||
| { | ||
| buffer = asset::ICPUBuffer::create({ sizeof(uint32_t) * bufferSize }); | ||
| return { owenSamplerFile, false }; | ||
| } | ||
| if (owenSamplerFileBytesRead < byteSize) | ||
| return false; | ||
|
|
||
| buffer = asset::ICPUBuffer::create({ { sizeof(uint32_t) * bufferSize }, data }); | ||
| buffer = asset::ICPUBuffer::create({ { byteSize }, data }); | ||
| return true; | ||
| } | ||
| } | ||
|
|
||
| return { owenSamplerFile, true }; | ||
| return false; | ||
| }; | ||
| auto writeBufferIntoCacheFile = [this](smart_refctd_ptr<IFile> file, size_t bufferSize, void* data) | ||
| auto writeBufferIntoCacheFile = [this](const system::path& filePath, size_t byteSize, const void* data) | ||
| { | ||
| std::filesystem::create_directories(filePath.parent_path()); | ||
|
|
||
| ISystem::future_t<smart_refctd_ptr<nbl::system::IFile>> owenSamplerFileFuture; | ||
| ISystem::future_t<size_t> owenSamplerFileWriteFuture; | ||
| size_t owenSamplerFileBytesWritten; | ||
| size_t owenSamplerFileBytesWritten = 0ull; | ||
|
|
||
| file->write(owenSamplerFileWriteFuture, data, 0, bufferSize); | ||
| m_system->createFile(owenSamplerFileFuture, filePath, IFile::ECF_WRITE); | ||
| if (!owenSamplerFileFuture.wait()) | ||
| return; | ||
|
|
||
| smart_refctd_ptr<IFile> file; | ||
| owenSamplerFileFuture.acquire().move_into(file); | ||
| if (!file) | ||
| return; | ||
|
|
||
| file->write(owenSamplerFileWriteFuture, const_cast<void*>(data), 0, byteSize); | ||
| if (owenSamplerFileWriteFuture.wait()) | ||
| owenSamplerFileWriteFuture.acquire().move_into(owenSamplerFileBytesWritten); | ||
| }; |
There was a problem hiding this comment.
@keptsecret can you put it in a header in a examples_common so that example 40 can use this cache and generate Scrambled Owen Sequence as well and we dont have a huge block of code like this sitting duplicate in both examples?
| template<typename T> | ||
| struct RuntimeShapeSamplingSelector<T, PST_SPHERE> | ||
| { | ||
| using scalar_type = T; | ||
| using vector3_type = vector<T, 3>; | ||
| using shape_type = Shape<T, PST_SPHERE>; | ||
|
|
||
| template<typename Ray> | ||
| static scalar_type deferredPdf(NEEPolygonMethod, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(Ray) ray) | ||
| { | ||
| const ShapeSampling<scalar_type, PST_SPHERE, PPM_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_SPHERE, PPM_SOLID_ANGLE>::create(shape); | ||
| return sampling.template deferredPdf<Ray>(ray); | ||
| } | ||
|
|
||
| template<class Aniso> | ||
| static vector3_type generate_and_pdf(NEEPolygonMethod, NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) | ||
| { | ||
| const ShapeSampling<scalar_type, PST_SPHERE, PPM_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_SPHERE, PPM_SOLID_ANGLE>::create(shape); | ||
| return sampling.template generate_and_pdf<Aniso>(pdf, newRayMaxT, origin, interaction, xi); | ||
| } | ||
| }; | ||
|
|
||
| template<typename T> | ||
| struct RuntimeShapeSamplingSelector<T, PST_TRIANGLE> | ||
| { | ||
| using scalar_type = T; | ||
| using vector3_type = vector<T, 3>; | ||
| using shape_type = Shape<T, PST_TRIANGLE>; | ||
|
|
||
| template<typename Ray> | ||
| static scalar_type deferredPdf(const NEEPolygonMethod polygonMethod, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(Ray) ray) | ||
| { | ||
| switch (polygonMethod) | ||
| { | ||
| case PPM_AREA: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_TRIANGLE, PPM_AREA> sampling = ShapeSampling<scalar_type, PST_TRIANGLE, PPM_AREA>::create(shape); | ||
| return sampling.template deferredPdf<Ray>(ray); | ||
| } | ||
| case PPM_SOLID_ANGLE: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_TRIANGLE, PPM_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_TRIANGLE, PPM_SOLID_ANGLE>::create(shape); | ||
| return sampling.template deferredPdf<Ray>(ray); | ||
| } | ||
| case PPM_APPROX_PROJECTED_SOLID_ANGLE: | ||
| default: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_TRIANGLE, PPM_APPROX_PROJECTED_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_TRIANGLE, PPM_APPROX_PROJECTED_SOLID_ANGLE>::create(shape); | ||
| return sampling.template deferredPdf<Ray>(ray); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template<class Aniso> | ||
| static vector3_type generate_and_pdf(const NEEPolygonMethod polygonMethod, NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) | ||
| { | ||
| switch (polygonMethod) | ||
| { | ||
| case PPM_AREA: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_TRIANGLE, PPM_AREA> sampling = ShapeSampling<scalar_type, PST_TRIANGLE, PPM_AREA>::create(shape); | ||
| return sampling.template generate_and_pdf<Aniso>(pdf, newRayMaxT, origin, interaction, xi); | ||
| } | ||
| case PPM_SOLID_ANGLE: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_TRIANGLE, PPM_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_TRIANGLE, PPM_SOLID_ANGLE>::create(shape); | ||
| return sampling.template generate_and_pdf<Aniso>(pdf, newRayMaxT, origin, interaction, xi); | ||
| } | ||
| case PPM_APPROX_PROJECTED_SOLID_ANGLE: | ||
| default: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_TRIANGLE, PPM_APPROX_PROJECTED_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_TRIANGLE, PPM_APPROX_PROJECTED_SOLID_ANGLE>::create(shape); | ||
| return sampling.template generate_and_pdf<Aniso>(pdf, newRayMaxT, origin, interaction, xi); | ||
| } | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| template<typename T> | ||
| struct RuntimeShapeSamplingSelector<T, PST_RECTANGLE> | ||
| { | ||
| using scalar_type = T; | ||
| using vector3_type = vector<T, 3>; | ||
| using shape_type = Shape<T, PST_RECTANGLE>; | ||
|
|
||
| template<typename Ray> | ||
| static scalar_type deferredPdf(const NEEPolygonMethod polygonMethod, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(Ray) ray) | ||
| { | ||
| switch (polygonMethod) | ||
| { | ||
| case PPM_AREA: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_RECTANGLE, PPM_AREA> sampling = ShapeSampling<scalar_type, PST_RECTANGLE, PPM_AREA>::create(shape); | ||
| return sampling.template deferredPdf<Ray>(ray); | ||
| } | ||
| case PPM_SOLID_ANGLE: | ||
| case PPM_APPROX_PROJECTED_SOLID_ANGLE: | ||
| default: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_RECTANGLE, PPM_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_RECTANGLE, PPM_SOLID_ANGLE>::create(shape); | ||
| return sampling.template deferredPdf<Ray>(ray); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template<class Aniso> | ||
| static vector3_type generate_and_pdf(const NEEPolygonMethod polygonMethod, NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) | ||
| { | ||
| switch (polygonMethod) | ||
| { | ||
| case PPM_AREA: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_RECTANGLE, PPM_AREA> sampling = ShapeSampling<scalar_type, PST_RECTANGLE, PPM_AREA>::create(shape); | ||
| return sampling.template generate_and_pdf<Aniso>(pdf, newRayMaxT, origin, interaction, xi); | ||
| } | ||
| case PPM_SOLID_ANGLE: | ||
| case PPM_APPROX_PROJECTED_SOLID_ANGLE: | ||
| default: | ||
| { | ||
| const ShapeSampling<scalar_type, PST_RECTANGLE, PPM_SOLID_ANGLE> sampling = ShapeSampling<scalar_type, PST_RECTANGLE, PPM_SOLID_ANGLE>::create(shape); | ||
| return sampling.template generate_and_pdf<Aniso>(pdf, newRayMaxT, origin, interaction, xi); | ||
| } | ||
| } | ||
| } | ||
| }; | ||
|
|
There was a problem hiding this comment.
Undo, you just went from 10ms to 80+
There was a problem hiding this comment.
resolved resolved resolved
| #ifndef PATH_TRACER_ENTRYPOINT_NAME | ||
| #define PATH_TRACER_ENTRYPOINT_NAME mainPersistent | ||
| #endif | ||
|
|
||
| #ifndef PATH_TRACER_ENTRYPOINT_POLYGON_METHOD | ||
| #ifdef PATH_TRACER_RUNTIME_POLYGON_METHOD | ||
| #define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PATH_TRACER_RUNTIME_POLYGON_METHOD | ||
| #else | ||
| #define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_APPROX_PROJECTED_SOLID_ANGLE | ||
| #endif | ||
| #endif | ||
|
|
||
| #if !PATH_TRACER_ENABLE_PERSISTENT | ||
| #error Persistent entrypoint requested while PATH_TRACER_ENABLE_PERSISTENT is disabled | ||
| #endif |
There was a problem hiding this comment.
why not just make a Config struct with NBL_CONSTEXPR_STATIC_INLINE members and then we don't need to change definitions, so:
- Boost Wave preprocessing is reusable
- Clang-AST actually spots similar instantiations
- Unity Build is possible
| #if PATH_TRACER_USE_RWMC | ||
| accumulator_type accumulator = accumulator_type::create(::pc.splattingParameters); | ||
| #else | ||
| accumulator_type accumulator = accumulator_type::create(); | ||
| #endif | ||
|
|
||
| for (int i = 0; i < renderPushConstants.sampleCount; ++i) | ||
| { | ||
| const float32_t3 uvw = pathtracer.randGen(0u, i); | ||
| ray_type ray = rayGen.generate(uvw); | ||
| ray.initPayload(); | ||
| pathtracer.sampleMeasure(ray, i, renderPushConstants.depth, accumulator); | ||
| } | ||
|
|
||
| #if PATH_TRACER_USE_RWMC | ||
| for (uint32_t i = 0; i < CascadeCount; ++i) | ||
| ::cascade[uint3(coords.x, coords.y, i)] = float32_t4(accumulator.accumulation.data[i], 1.0f); | ||
| #else | ||
| ::outImage[uint3(coords.x, coords.y, 0)] = float32_t4(accumulator.accumulation, 1.0); | ||
| #endif | ||
| } | ||
|
|
||
| #if PATH_TRACER_ENABLE_LINEAR | ||
| void runLinear(uint32_t3 threadID, NEEPolygonMethod polygonMethod) | ||
| { | ||
| uint32_t width, height, imageArraySize; | ||
| ::outImage.GetDimensions(width, height, imageArraySize); | ||
| tracePixel(int32_t2(threadID.x % width, threadID.x / width), polygonMethod); | ||
| } | ||
| #endif | ||
|
|
||
| #if PATH_TRACER_ENABLE_PERSISTENT | ||
| void runPersistent(NEEPolygonMethod polygonMethod) | ||
| { | ||
| uint32_t width, height, imageArraySize; | ||
| ::outImage.GetDimensions(width, height, imageArraySize); | ||
| const uint32_t numWorkgroupsX = width / RenderWorkgroupSizeSqrt; | ||
| const uint32_t numWorkgroupsY = height / RenderWorkgroupSizeSqrt; | ||
|
|
||
| [loop] | ||
| for (uint32_t wgBase = glsl::gl_WorkGroupID().x; wgBase < numWorkgroupsX * numWorkgroupsY; wgBase += glsl::gl_NumWorkGroups().x) | ||
| { | ||
| const int32_t2 wgCoords = int32_t2(wgBase % numWorkgroupsX, wgBase / numWorkgroupsX); | ||
| morton::code<true, 32, 2> mc; | ||
| mc.value = glsl::gl_LocalInvocationIndex().x; | ||
| const int32_t2 localCoords = _static_cast<int32_t2>(mc); | ||
| tracePixel(wgCoords * int32_t2(RenderWorkgroupSizeSqrt, RenderWorkgroupSizeSqrt) + localCoords, polygonMethod); | ||
| } | ||
| } | ||
| #endif |
There was a problem hiding this comment.
a lot of this stuff would be nicer if you packaged it into a struct with constexpr members of templates and used NBL_IF_CONSTEXPR whenever possible
| video::SPhysicalDeviceLimits retval = device_base_t::getRequiredDeviceLimits(); | ||
| retval.storagePushConstant16 = true; | ||
| auto retval = device_base_t::getPreferredDeviceFeatures(); | ||
| retval.pipelineExecutableInfo = true; |
There was a problem hiding this comment.
also output the pipeline info like the example used to
| template<core::StringLiteral ShaderKey> | ||
| smart_refctd_ptr<IShader> loadPrecompiledShader() | ||
| { | ||
| IAssetLoader::SAssetLoadParams lp = {}; | ||
| lp.logger = m_logger.get(); | ||
| lp.workingDirectory = "app_resources"; | ||
|
|
||
| const auto key = nbl::this_example::builtin::build::get_spirv_key<ShaderKey>(m_device.get()); | ||
| auto assetBundle = m_assetMgr->getAsset(key, lp); | ||
| const auto assets = assetBundle.getContents(); | ||
| if (assets.empty()) | ||
| { | ||
| m_logger->log("Could not load precompiled shader: %s", ILogger::ELL_ERROR, key.c_str()); | ||
| return nullptr; | ||
| } | ||
|
|
||
| IGPUComputePipeline* pickPTPipeline() | ||
| auto shader = IAsset::castDown<IShader>(assets[0]); | ||
| if (!shader) | ||
| { | ||
| m_logger->log("Failed to cast %s asset to IShader!", ILogger::ELL_ERROR, key.c_str()); | ||
| return nullptr; | ||
| } | ||
|
|
||
| shader->setFilePathHint(std::string(std::string_view(ShaderKey.value))); | ||
| return shader; | ||
| } | ||
|
|
||
| void logStartupEvent(const char* const eventName) | ||
| { | ||
| const auto elapsedMs = std::chrono::duration_cast<std::chrono::milliseconds>(clock_t::now() - m_startupBeganAt).count(); | ||
| m_logger->log("PATH_TRACER_STARTUP %s_ms=%lld", ILogger::ELL_INFO, eventName, static_cast<long long>(elapsedMs)); | ||
| } | ||
|
|
||
| bool parseCommandLine() | ||
| { | ||
| argparse::ArgumentParser parser("31_hlslpathtracer"); | ||
| parser.add_argument("--pipeline-cache-dir") | ||
| .nargs(1) | ||
| .help("Override the PATH_TRACER pipeline cache root directory"); | ||
| parser.add_argument("--clear-pipeline-cache") | ||
| .help("Clear the PATH_TRACER cache root before startup") | ||
| .flag(); | ||
|
|
||
| try | ||
| { | ||
| parser.parse_args({ argv.data(), argv.data() + argv.size() }); | ||
| } | ||
| catch (const std::exception& e) | ||
| { | ||
| m_logger->log("Failed to parse arguments: %s", ILogger::ELL_ERROR, e.what()); | ||
| return false; | ||
| } | ||
|
|
||
| m_commandLine.pipelineCacheDirOverride.reset(); | ||
| if (parser.present("--pipeline-cache-dir")) | ||
| m_commandLine.pipelineCacheDirOverride = path(parser.get<std::string>("--pipeline-cache-dir")); | ||
| m_commandLine.clearPipelineCache = parser.get<bool>("--clear-pipeline-cache"); | ||
| return true; | ||
| } | ||
|
|
||
| static std::string hashToHex(const core::blake3_hash_t& hash) | ||
| { | ||
| static constexpr char digits[] = "0123456789abcdef"; | ||
| static constexpr size_t HexCharsPerByte = 2ull; | ||
| static constexpr uint32_t HighNibbleBitOffset = 4u; | ||
| static constexpr uint8_t NibbleMask = 0xfu; | ||
| const auto hashByteCount = sizeof(hash.data); | ||
| std::string retval; | ||
| retval.resize(hashByteCount * HexCharsPerByte); | ||
| for (size_t i = 0ull; i < hashByteCount; ++i) | ||
| { | ||
| const auto hexOffset = i * HexCharsPerByte; | ||
| retval[hexOffset] = digits[(hash.data[i] >> HighNibbleBitOffset) & NibbleMask]; | ||
| retval[hexOffset + 1ull] = digits[hash.data[i] & NibbleMask]; | ||
| } | ||
| return retval; | ||
| } | ||
|
|
||
| path getDefaultPipelineCacheDir() const | ||
| { | ||
| if (const auto* localAppData = std::getenv("LOCALAPPDATA"); localAppData && localAppData[0] != '\0') | ||
| return path(localAppData) / "nabla/examples/31_HLSLPathTracer/pipeline/cache"; | ||
| return localOutputCWD / "pipeline/cache"; | ||
| } | ||
|
|
||
| path getRuntimeConfigPath() const | ||
| { | ||
| return system::executableDirectory() / RuntimeConfigFilename; | ||
| } | ||
|
|
||
| std::optional<path> tryGetPipelineCacheDirFromRuntimeConfig() const | ||
| { | ||
| const auto configPath = getRuntimeConfigPath(); | ||
| if (!m_system->exists(configPath, IFile::ECF_READ)) | ||
| return std::nullopt; | ||
|
|
||
| std::ifstream input(configPath); | ||
| if (!input.is_open()) | ||
| return std::nullopt; | ||
|
|
||
| nlohmann::json json; | ||
| try | ||
| { | ||
| input >> json; | ||
| } | ||
| catch (const std::exception& e) | ||
| { | ||
| m_logger->log("Failed to parse PATH_TRACER runtime config %s: %s", ILogger::ELL_WARNING, configPath.string().c_str(), e.what()); | ||
| return std::nullopt; | ||
| } | ||
|
|
||
| const auto cacheRootIt = json.find("cache_root"); | ||
| if (cacheRootIt == json.end() || !cacheRootIt->is_string()) | ||
| return std::nullopt; | ||
|
|
||
| const auto cacheRoot = cacheRootIt->get<std::string>(); | ||
| if (cacheRoot.empty()) | ||
| return std::nullopt; | ||
|
|
||
| const path relativeRoot(cacheRoot); | ||
| if (relativeRoot.is_absolute()) | ||
| { | ||
| m_logger->log("Ignoring absolute cache_root in %s", ILogger::ELL_WARNING, configPath.string().c_str()); | ||
| return std::nullopt; | ||
| } | ||
|
|
||
| return (configPath.parent_path() / relativeRoot).lexically_normal(); | ||
| } | ||
|
|
||
| path getPipelineCacheRootDir() const | ||
| { | ||
| if (m_commandLine.pipelineCacheDirOverride.has_value()) | ||
| return m_commandLine.pipelineCacheDirOverride.value(); | ||
| if (const auto runtimeConfigDir = tryGetPipelineCacheDirFromRuntimeConfig(); runtimeConfigDir.has_value()) | ||
| return runtimeConfigDir.value(); | ||
| return getDefaultPipelineCacheDir(); | ||
| } | ||
|
|
||
| path getPipelineCacheBlobPath() const | ||
| { | ||
| const auto key = m_device->getPipelineCacheKey(); | ||
| return getPipelineCacheRootDir() / "blob" / BuildConfigName / (std::string(key.deviceAndDriverUUID) + ".bin"); | ||
| } | ||
|
|
||
| path getSpirvCacheDir() const | ||
| { | ||
| return getPipelineCacheRootDir() / "spirv" / BuildConfigName; | ||
| } | ||
|
|
||
| path getTrimmedShaderCachePath(const IShader* shader, const char* const entryPoint) const | ||
| { | ||
| core::blake3_hasher hasher; | ||
| hasher << std::string_view(shader ? shader->getFilepathHint() : std::string_view{}); | ||
| hasher << std::string_view(entryPoint); | ||
| return getSpirvCacheDir() / (hashToHex(static_cast<core::blake3_hash_t>(hasher)) + ".spv"); | ||
| } | ||
|
|
||
| path getValidatedSpirvMarkerPath(const ICPUBuffer* spirvBuffer) const | ||
| { | ||
| IGPUComputePipeline* pipeline; | ||
| if (guiControlled.useRWMC) | ||
| pipeline = guiControlled.usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelinesRWMC[guiControlled.PTPipeline].get() : m_PTHLSLPipelinesRWMC[guiControlled.PTPipeline].get(); | ||
| else | ||
| pipeline = guiControlled.usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelines[guiControlled.PTPipeline].get() : m_PTHLSLPipelines[guiControlled.PTPipeline].get(); | ||
| auto contentHash = spirvBuffer->getContentHash(); | ||
| if (contentHash == ICPUBuffer::INVALID_HASH) | ||
| contentHash = spirvBuffer->computeContentHash(); | ||
| return getSpirvCacheDir() / (hashToHex(contentHash) + ".hash"); | ||
| } | ||
|
|
||
| size_t getBackgroundPipelineBuildBudget() const | ||
| { | ||
| static constexpr uint32_t ReservedForegroundThreadCount = 1u; | ||
| const auto concurrency = std::thread::hardware_concurrency(); | ||
| if (concurrency > ReservedForegroundThreadCount) | ||
| return static_cast<size_t>(concurrency - ReservedForegroundThreadCount); | ||
| return ReservedForegroundThreadCount; | ||
| } | ||
|
|
||
| return pipeline; | ||
| bool ensureCacheDirectoryExists(const path& dir, const char* const description) | ||
| { | ||
| if (dir.empty() || m_system->isDirectory(dir)) | ||
| return true; | ||
|
|
||
| if (m_system->createDirectory(dir) || m_system->isDirectory(dir)) | ||
| return true; | ||
|
|
||
| m_logger->log("Failed to create %s %s", ILogger::ELL_WARNING, description, dir.string().c_str()); | ||
| return false; | ||
| } | ||
|
|
||
| bool finalizeCacheFile(const path& tempPath, const path& finalPath, const char* const description) | ||
| { | ||
| m_system->deleteFile(finalPath); | ||
| const auto ec = m_system->moveFileOrDirectory(tempPath, finalPath); | ||
| if (!ec) | ||
| return true; | ||
|
|
||
| m_system->deleteFile(tempPath); | ||
| m_logger->log("Failed to finalize %s %s", ILogger::ELL_WARNING, description, finalPath.string().c_str()); | ||
| return false; | ||
| } | ||
|
|
||
| void initializePipelineCache() | ||
| { | ||
| m_pipelineCache.blobPath = getPipelineCacheBlobPath(); | ||
| m_pipelineCache.trimmedShaders.rootDir = getSpirvCacheDir(); | ||
| m_pipelineCache.trimmedShaders.validationDir = getSpirvCacheDir(); | ||
| if (!m_pipelineCache.trimmedShaders.trimmer) | ||
| m_pipelineCache.trimmedShaders.trimmer = core::make_smart_refctd_ptr<asset::ISPIRVEntryPointTrimmer>(); | ||
| const auto pipelineCacheRootDir = getPipelineCacheRootDir(); | ||
| std::error_code ec; | ||
| m_pipelineCache.loadedBytes = 0ull; | ||
| m_pipelineCache.loadedFromDisk = false; | ||
| m_pipelineCache.clearedOnStartup = m_commandLine.clearPipelineCache; | ||
| m_pipelineCache.newlyReadyPipelinesSinceLastSave = 0ull; | ||
| m_pipelineCache.checkpointedAfterFirstSubmit = false; | ||
| m_pipelineCache.lastSaveAt = clock_t::now(); | ||
| if (m_commandLine.clearPipelineCache) | ||
| { | ||
| if (m_system->isDirectory(pipelineCacheRootDir) && !m_system->deleteDirectory(pipelineCacheRootDir)) | ||
| m_logger->log("Failed to clear pipeline cache directory %s", ILogger::ELL_WARNING, pipelineCacheRootDir.string().c_str()); | ||
| else | ||
| m_logger->log("PATH_TRACER_PIPELINE_CACHE clear root=%s", ILogger::ELL_INFO, pipelineCacheRootDir.string().c_str()); | ||
| } | ||
| ensureCacheDirectoryExists(m_pipelineCache.blobPath.parent_path(), "pipeline cache directory"); | ||
| ensureCacheDirectoryExists(m_pipelineCache.trimmedShaders.rootDir, "trimmed shader cache directory"); | ||
| ensureCacheDirectoryExists(m_pipelineCache.trimmedShaders.validationDir, "validated shader cache directory"); | ||
|
|
||
| std::vector<uint8_t> initialData; | ||
| { | ||
| std::ifstream input(m_pipelineCache.blobPath, std::ios::binary | std::ios::ate); | ||
| if (input.is_open()) | ||
| { | ||
| const auto size = input.tellg(); | ||
| if (size > 0) | ||
| { | ||
| initialData.resize(static_cast<size_t>(size)); | ||
| input.seekg(0, std::ios::beg); | ||
| input.read(reinterpret_cast<char*>(initialData.data()), static_cast<std::streamsize>(initialData.size())); | ||
| if (!input) | ||
| initialData.clear(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| std::span<const uint8_t> initialDataSpan = {}; | ||
| if (!initialData.empty()) | ||
| { | ||
| initialDataSpan = { initialData.data(), initialData.size() }; | ||
| m_pipelineCache.loadedBytes = initialData.size(); | ||
| m_pipelineCache.loadedFromDisk = true; | ||
| } | ||
|
|
||
| m_pipelineCache.object = m_device->createPipelineCache(initialDataSpan); | ||
| if (!m_pipelineCache.object && !initialData.empty()) | ||
| { | ||
| m_logger->log("Pipeline cache blob at %s was rejected. Falling back to empty cache.", ILogger::ELL_WARNING, m_pipelineCache.blobPath.string().c_str()); | ||
| m_pipelineCache.object = m_device->createPipelineCache(std::span<const uint8_t>{}); | ||
| } | ||
| if (!m_pipelineCache.object) | ||
| { | ||
| m_logger->log("Failed to create PATH_TRACER pipeline cache.", ILogger::ELL_WARNING); | ||
| return; | ||
| } | ||
|
|
||
| m_pipelineCache.object->setObjectDebugName("PATH_TRACER Pipeline Cache"); | ||
| m_logger->log("PATH_TRACER pipeline cache path: %s", ILogger::ELL_INFO, m_pipelineCache.blobPath.string().c_str()); | ||
| m_logger->log("PATH_TRACER trimmed shader cache path: %s", ILogger::ELL_INFO, m_pipelineCache.trimmedShaders.rootDir.string().c_str()); | ||
| m_logger->log("PATH_TRACER validated shader cache path: %s", ILogger::ELL_INFO, m_pipelineCache.trimmedShaders.validationDir.string().c_str()); | ||
| m_logger->log( | ||
| "PATH_TRACER_PIPELINE_CACHE init clear=%u loaded_from_disk=%u loaded_bytes=%zu path=%s", | ||
| ILogger::ELL_INFO, | ||
| m_pipelineCache.clearedOnStartup ? 1u : 0u, | ||
| m_pipelineCache.loadedFromDisk ? 1u : 0u, | ||
| m_pipelineCache.loadedBytes, | ||
| m_pipelineCache.blobPath.string().c_str() | ||
| ); | ||
| if (!initialData.empty()) | ||
| m_logger->log("Loaded PATH_TRACER pipeline cache blob: %s", ILogger::ELL_INFO, m_pipelineCache.blobPath.string().c_str()); | ||
| } | ||
|
|
||
| smart_refctd_ptr<IShader> tryLoadTrimmedShaderFromDisk(const IShader* sourceShader, const char* const entryPoint) | ||
| { | ||
| const auto cachePath = getTrimmedShaderCachePath(sourceShader, entryPoint); | ||
| std::ifstream input(cachePath, std::ios::binary | std::ios::ate); | ||
| if (!input.is_open()) | ||
| return nullptr; | ||
|
|
||
| const auto size = input.tellg(); | ||
| if (size <= 0) | ||
| return nullptr; | ||
|
|
||
| std::vector<uint8_t> bytes(static_cast<size_t>(size)); | ||
| input.seekg(0, std::ios::beg); | ||
| input.read(reinterpret_cast<char*>(bytes.data()), static_cast<std::streamsize>(bytes.size())); | ||
| if (!input) | ||
| return nullptr; | ||
|
|
||
| auto buffer = ICPUBuffer::create({ { bytes.size() }, bytes.data() }); | ||
| if (!buffer) | ||
| return nullptr; | ||
| buffer->setContentHash(buffer->computeContentHash()); | ||
| { | ||
| std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); | ||
| m_pipelineCache.trimmedShaders.loadedBytes += bytes.size(); | ||
| ++m_pipelineCache.trimmedShaders.loadedFromDiskCount; | ||
| } | ||
| m_logger->log( | ||
| "PATH_TRACER_SHADER_CACHE load entrypoint=%s bytes=%zu path=%s", | ||
| ILogger::ELL_INFO, | ||
| entryPoint, | ||
| bytes.size(), | ||
| cachePath.string().c_str() | ||
| ); | ||
| return core::make_smart_refctd_ptr<IShader>(std::move(buffer), IShader::E_CONTENT_TYPE::ECT_SPIRV, std::string(sourceShader->getFilepathHint())); | ||
| } | ||
|
|
||
| bool hasValidatedSpirvMarker(const ICPUBuffer* spirvBuffer) const | ||
| { | ||
| return m_system->exists(getValidatedSpirvMarkerPath(spirvBuffer), IFile::ECF_READ); | ||
| } | ||
|
|
||
| void saveValidatedSpirvMarker(const ICPUBuffer* spirvBuffer) | ||
| { | ||
| const auto markerPath = getValidatedSpirvMarkerPath(spirvBuffer); | ||
| if (!ensureCacheDirectoryExists(markerPath.parent_path(), "validated shader cache directory")) | ||
| return; | ||
|
|
||
| auto tempPath = markerPath; | ||
| tempPath += ".tmp"; | ||
| { | ||
| std::ofstream output(tempPath, std::ios::binary | std::ios::trunc); | ||
| if (!output.is_open()) | ||
| { | ||
| m_logger->log("Failed to open validated shader marker temp file %s", ILogger::ELL_WARNING, tempPath.string().c_str()); | ||
| return; | ||
| } | ||
| output << "ok\n"; | ||
| output.flush(); | ||
| if (!output) | ||
| { | ||
| output.close(); | ||
| m_system->deleteFile(tempPath); | ||
| m_logger->log("Failed to write validated shader marker %s", ILogger::ELL_WARNING, tempPath.string().c_str()); | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| finalizeCacheFile(tempPath, markerPath, "validated shader marker"); | ||
| } | ||
|
|
||
| bool ensurePreparedShaderValidated(const smart_refctd_ptr<IShader>& preparedShader) | ||
| { | ||
| if (!preparedShader) | ||
| return false; | ||
|
|
||
| auto* const content = preparedShader->getContent(); | ||
| if (!content) | ||
| return false; | ||
|
|
||
| if (hasValidatedSpirvMarker(content)) | ||
| { | ||
| m_pipelineCache.trimmedShaders.trimmer->markValidated(content); | ||
| return true; | ||
| } | ||
|
|
||
| if (!m_pipelineCache.trimmedShaders.trimmer->ensureValidated(content, m_logger.get())) | ||
| return false; | ||
|
|
||
| saveValidatedSpirvMarker(content); | ||
| return true; | ||
| } | ||
|
|
||
| void saveTrimmedShaderToDisk(const IShader* shader, const char* const entryPoint, const path& cachePath) | ||
| { | ||
| const auto* content = shader->getContent(); | ||
| if (!content || !content->getPointer() || cachePath.empty()) | ||
| return; | ||
|
|
||
| if (!ensureCacheDirectoryExists(cachePath.parent_path(), "trimmed shader cache directory")) | ||
| return; | ||
|
|
||
| auto tempPath = cachePath; | ||
| tempPath += ".tmp"; | ||
| { | ||
| std::ofstream output(tempPath, std::ios::binary | std::ios::trunc); | ||
| if (!output.is_open()) | ||
| { | ||
| m_logger->log("Failed to open trimmed shader cache temp file %s", ILogger::ELL_WARNING, tempPath.string().c_str()); | ||
| return; | ||
| } | ||
| output.write(reinterpret_cast<const char*>(content->getPointer()), static_cast<std::streamsize>(content->getSize())); | ||
| output.flush(); | ||
| if (!output) | ||
| { | ||
| output.close(); | ||
| m_system->deleteFile(tempPath); | ||
| m_logger->log("Failed to write trimmed shader cache blob to %s", ILogger::ELL_WARNING, tempPath.string().c_str()); | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| if (!finalizeCacheFile(tempPath, cachePath, "trimmed shader cache blob")) | ||
| return; | ||
|
|
||
| { | ||
| std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); | ||
| m_pipelineCache.trimmedShaders.savedBytes += content->getSize(); | ||
| ++m_pipelineCache.trimmedShaders.savedToDiskCount; | ||
| } | ||
| m_logger->log( | ||
| "PATH_TRACER_SHADER_CACHE save entrypoint=%s bytes=%zu path=%s", | ||
| ILogger::ELL_INFO, | ||
| entryPoint, | ||
| content->getSize(), | ||
| cachePath.string().c_str() | ||
| ); | ||
| } | ||
|
|
||
| smart_refctd_ptr<IShader> getPreparedShaderForEntryPoint(const smart_refctd_ptr<IShader>& shaderModule, const char* const entryPoint) | ||
| { | ||
| if (!shaderModule || shaderModule->getContentType() != IShader::E_CONTENT_TYPE::ECT_SPIRV) | ||
| return shaderModule; | ||
|
|
||
| const auto cachePath = getTrimmedShaderCachePath(shaderModule.get(), entryPoint); | ||
| const auto cacheKey = cachePath.string(); | ||
| { | ||
| std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); | ||
| const auto found = m_pipelineCache.trimmedShaders.runtimeShaders.find(cacheKey); | ||
| if (found != m_pipelineCache.trimmedShaders.runtimeShaders.end()) | ||
| return found->second; | ||
| } | ||
|
|
||
| const auto startedAt = clock_t::now(); | ||
| auto preparedShader = tryLoadTrimmedShaderFromDisk(shaderModule.get(), entryPoint); | ||
| bool cameFromDisk = static_cast<bool>(preparedShader); | ||
| bool wasTrimmed = false; | ||
| if (!preparedShader) | ||
| { | ||
| const core::set entryPoints = { asset::ISPIRVEntryPointTrimmer::EntryPoint{ .name = entryPoint, .stage = hlsl::ShaderStage::ESS_COMPUTE } }; | ||
| const auto result = m_pipelineCache.trimmedShaders.trimmer->trim(shaderModule->getContent(), entryPoints, nullptr); | ||
| if (!result) | ||
| { | ||
| m_logger->log("Failed to prepare trimmed PATH_TRACER shader for %s. Falling back to the original module.", ILogger::ELL_WARNING, entryPoint); | ||
| return shaderModule; | ||
| } | ||
| if (result.spirv) | ||
| { | ||
| result.spirv->setContentHash(result.spirv->computeContentHash()); | ||
| preparedShader = core::make_smart_refctd_ptr<IShader>(core::smart_refctd_ptr(result.spirv), IShader::E_CONTENT_TYPE::ECT_SPIRV, std::string(shaderModule->getFilepathHint())); | ||
| } | ||
| else | ||
| preparedShader = shaderModule; | ||
|
|
||
| saveTrimmedShaderToDisk(preparedShader.get(), entryPoint, cachePath); | ||
| { | ||
| std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); | ||
| ++m_pipelineCache.trimmedShaders.generatedCount; | ||
| } | ||
| wasTrimmed = (preparedShader != shaderModule); | ||
| } | ||
|
|
||
| if (!ensurePreparedShaderValidated(preparedShader)) | ||
| { | ||
| m_logger->log("Prepared PATH_TRACER shader for %s is not valid SPIR-V", ILogger::ELL_ERROR, entryPoint); | ||
| return nullptr; | ||
| } | ||
|
|
||
| { | ||
| std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); | ||
| const auto [it, inserted] = m_pipelineCache.trimmedShaders.runtimeShaders.emplace(cacheKey, preparedShader); | ||
| if (!inserted) | ||
| preparedShader = it->second; | ||
| } | ||
|
|
||
| const auto wallMs = std::chrono::duration_cast<std::chrono::milliseconds>(clock_t::now() - startedAt).count(); | ||
| m_logger->log( | ||
| "PATH_TRACER_SHADER_CACHE ready entrypoint=%s wall_ms=%lld from_disk=%u trimmed=%u", | ||
| ILogger::ELL_INFO, | ||
| entryPoint, | ||
| static_cast<long long>(wallMs), | ||
| cameFromDisk ? 1u : 0u, | ||
| wasTrimmed ? 1u : 0u | ||
| ); | ||
| return preparedShader; | ||
| } | ||
|
|
||
| void savePipelineCache() | ||
| { | ||
| if (!m_pipelineCache.object || !m_pipelineCache.dirty || m_pipelineCache.blobPath.empty()) | ||
| return; | ||
|
|
||
| const auto saveStartedAt = clock_t::now(); | ||
| auto cpuCache = m_pipelineCache.object->convertToCPUCache(); | ||
| if (!cpuCache) | ||
| return; | ||
|
|
||
| const auto& entries = cpuCache->getEntries(); | ||
| const auto found = entries.find(m_device->getPipelineCacheKey()); | ||
| if (found == entries.end() || !found->second.bin || found->second.bin->empty()) | ||
| return; | ||
|
|
||
| if (!ensureCacheDirectoryExists(m_pipelineCache.blobPath.parent_path(), "pipeline cache directory")) | ||
| return; | ||
|
|
||
| auto tempPath = m_pipelineCache.blobPath; | ||
| tempPath += ".tmp"; | ||
| { | ||
| std::ofstream output(tempPath, std::ios::binary | std::ios::trunc); | ||
| if (!output.is_open()) | ||
| { | ||
| m_logger->log("Failed to open pipeline cache temp file %s", ILogger::ELL_WARNING, tempPath.string().c_str()); | ||
| return; | ||
| } | ||
| output.write(reinterpret_cast<const char*>(found->second.bin->data()), static_cast<std::streamsize>(found->second.bin->size())); | ||
| output.flush(); | ||
| if (!output) | ||
| { | ||
| output.close(); | ||
| m_system->deleteFile(tempPath); | ||
| m_logger->log("Failed to write pipeline cache blob to %s", ILogger::ELL_WARNING, tempPath.string().c_str()); | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| if (!finalizeCacheFile(tempPath, m_pipelineCache.blobPath, "pipeline cache blob")) | ||
| return; | ||
|
|
||
| m_pipelineCache.dirty = false; | ||
| m_pipelineCache.savedBytes = found->second.bin->size(); | ||
| m_pipelineCache.newlyReadyPipelinesSinceLastSave = 0ull; | ||
| m_pipelineCache.lastSaveAt = clock_t::now(); | ||
| const auto saveElapsedMs = std::chrono::duration_cast<std::chrono::milliseconds>(clock_t::now() - saveStartedAt).count(); | ||
| m_logger->log( | ||
| "PATH_TRACER_PIPELINE_CACHE save bytes=%zu wall_ms=%lld path=%s", | ||
| ILogger::ELL_INFO, | ||
| m_pipelineCache.savedBytes, | ||
| static_cast<long long>(saveElapsedMs), | ||
| m_pipelineCache.blobPath.string().c_str() | ||
| ); | ||
| m_logger->log("Saved PATH_TRACER pipeline cache blob: %s", ILogger::ELL_INFO, m_pipelineCache.blobPath.string().c_str()); | ||
| } | ||
|
|
||
| void maybeCheckpointPipelineCache() | ||
| { | ||
| if (!m_pipelineCache.object || !m_pipelineCache.dirty) | ||
| return; | ||
|
|
||
| if (m_startupLog.loggedFirstRenderSubmit && !m_pipelineCache.checkpointedAfterFirstSubmit) | ||
| { | ||
| savePipelineCache(); | ||
| m_pipelineCache.checkpointedAfterFirstSubmit = true; | ||
| return; | ||
| } | ||
|
|
||
| if (!m_pipelineCache.warmup.started || m_pipelineCache.warmup.loggedComplete) | ||
| return; | ||
|
|
||
| static constexpr size_t WarmupCheckpointThreshold = 4ull; | ||
| if (m_pipelineCache.newlyReadyPipelinesSinceLastSave < WarmupCheckpointThreshold) | ||
| return; | ||
|
|
||
| const auto elapsedSinceLastSave = std::chrono::duration_cast<std::chrono::milliseconds>(clock_t::now() - m_pipelineCache.lastSaveAt).count(); | ||
| if (elapsedSinceLastSave < 1000ll) | ||
| return; | ||
|
|
||
| savePipelineCache(); | ||
| } | ||
|
|
||
| smart_refctd_ptr<IShader> loadRenderShader(const E_LIGHT_GEOMETRY geometry, const bool persistentWorkGroups, const bool rwmc) | ||
| { | ||
| switch (geometry) | ||
| { | ||
| case ELG_SPHERE: | ||
| if (rwmc) | ||
| return loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.sphere.rwmc")>(); | ||
| return loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.sphere")>(); | ||
| case ELG_TRIANGLE: | ||
| if (rwmc) | ||
| return persistentWorkGroups ? | ||
| loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.triangle.rwmc.persistent")>() : | ||
| loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.triangle.rwmc.linear")>(); | ||
| return persistentWorkGroups ? | ||
| loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.triangle.persistent")>() : | ||
| loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.triangle.linear")>(); | ||
| case ELG_RECTANGLE: | ||
| if (rwmc) | ||
| return persistentWorkGroups ? | ||
| loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.rectangle.rwmc.persistent")>() : | ||
| loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.rectangle.rwmc.linear")>(); | ||
| return loadPrecompiledShader<NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("pt.compute.rectangle")>(); | ||
| default: | ||
| return nullptr; | ||
| } | ||
| } |
There was a problem hiding this comment.
can you:
- more this 1500 LoC addition to its own file
- make as much of this reusable between the examples ?
| pipeline_future_t requestComputePipelineBuild(smart_refctd_ptr<IShader> shaderModule, IGPUPipelineLayout* const pipelineLayout, const char* const entryPoint) | ||
| { | ||
| if (!shaderModule) | ||
| return {}; | ||
|
|
||
| return std::async( | ||
| std::launch::async, | ||
| [ | ||
| this, | ||
| device = m_device, | ||
| pipelineCache = m_pipelineCache.object, | ||
| shader = std::move(shaderModule), | ||
| layout = smart_refctd_ptr<IGPUPipelineLayout>(pipelineLayout), | ||
| requiredSubgroupSize = m_requiredSubgroupSize, | ||
| logger = m_logger.get(), | ||
| entryPointName = std::string(entryPoint), | ||
| cacheLoadedFromDisk = m_pipelineCache.loadedFromDisk | ||
| ]() -> smart_refctd_ptr<IGPUComputePipeline> | ||
| { | ||
| const auto startedAt = clock_t::now(); | ||
| auto preparedShader = getPreparedShaderForEntryPoint(shader, entryPointName.c_str()); | ||
| if (!preparedShader) | ||
| return nullptr; | ||
| smart_refctd_ptr<IGPUComputePipeline> pipeline; | ||
| IGPUComputePipeline::SCreationParams params = {}; | ||
| params.layout = layout.get(); | ||
| params.shader.shader = preparedShader.get(); | ||
| params.shader.entryPoint = entryPointName.c_str(); | ||
| params.shader.entries = nullptr; | ||
| params.cached.requireFullSubgroups = true; | ||
| params.shader.requiredSubgroupSize = requiredSubgroupSize; | ||
| if (!device->createComputePipelines(pipelineCache.get(), { ¶ms, 1 }, &pipeline)) | ||
| { | ||
| if (logger) | ||
| logger->log("Failed to create precompiled path tracing pipeline for %s", ILogger::ELL_ERROR, entryPointName.c_str()); | ||
| return nullptr; | ||
| } | ||
| if (logger) | ||
| { | ||
| const auto wallMs = std::chrono::duration_cast<std::chrono::milliseconds>(clock_t::now() - startedAt).count(); | ||
| logger->log( | ||
| "PATH_TRACER_PIPELINE_BUILD entrypoint=%s wall_ms=%lld cache_loaded_from_disk=%u", | ||
| ILogger::ELL_INFO, | ||
| entryPointName.c_str(), | ||
| static_cast<long long>(wallMs), | ||
| cacheLoadedFromDisk ? 1u : 0u | ||
| ); | ||
| } | ||
| return pipeline; | ||
| } | ||
| ); | ||
| } | ||
|
|
||
| void pollPendingPipeline(pipeline_future_t& future, smart_refctd_ptr<IGPUComputePipeline>& pipeline) | ||
| { | ||
| if (!future.valid() || pipeline) | ||
| return; | ||
| if (future.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) | ||
| return; | ||
| pipeline = future.get(); | ||
| if (pipeline) | ||
| { | ||
| m_pipelineCache.dirty = true; | ||
| ++m_pipelineCache.newlyReadyPipelinesSinceLastSave; | ||
| } | ||
| } |
There was a problem hiding this comment.
make something like a CachedPipelinesManager in the examples_common
# Conflicts: # 31_HLSLPathTracer/main.cpp
Summary
--pipeline-cache-dir,--clear-pipeline-cache, and a generatedpath_tracer.runtime.jsonthat resolves a relativepipeline/cacheroot from the common bin directory and falls back toLocalAppDataoutside the CMake flowexamples_tests/common/include/nbl/examples/commonNote on shape
A noticeable part of the current packaged-SPIR-V wiring exists because this branch cannot assume Devsh-Graphics-Programming/Nabla#988 is merged. If that PR lands, a large part of this glue can move out of the example and the packaged SPIR-V setup can be reduced materially.
Root cause
The base EX31 path had two separate problems.
First, EX31 started as a runtime-oriented example in
eab0f70cand2f77555ce. Shader selection and compute pipeline creation lived in runtime from the start. That runtime matrix then expanded with persistent workgroups in153556152and with RWMC in3d206fd4. The current line locations inmain.cppcome from later refactors, but the semantic shape predates them.Second, once EX31 is moved to packaged SPIR-V, startup repays pipeline creation unless those packaged variants share a real pipeline cache and the prepared SPIR-V path avoids revalidating the same blob every run. The base render and resolve compute pipeline creation sites pass
nullptrcache inmain.cpp#L404-L478. That runtime creation model originates in2f77555ceand was widened by153556152and3d206fd4.Triangle polygon-method selection also needs to stay compile-time in the packaged path. Making that choice runtime changes the shader execution model and obscures whether later backend wins come from DXC/SPIR-V work or from changing the rendering path itself.
Validation
Validation was run on AMD Ryzen 5 5600G with Radeon Graphics (6C/12T).
Current local validation on the final paired state:
nsc, builtinsOFF:31_hlslpathtracerSPIRVbuild OK,31_hlslpathtracerbuild OK, warmfirst_render_submit_ms=1533nsc, builtinsON:31_hlslpathtracerSPIRVbuild OK,31_hlslpathtracerbuild OK, coldfirst_render_submit_ms=2779, warmfirst_render_submit_ms=1850ONbackground warmup after the first submit:queued_jobs=21,max_parallel=11, warmup wall1625 ms--pipeline-cache-dir <path>and--clear-pipeline-cacheRelWithDebInfo; the generatedpath_tracer.runtime.jsonresolvespipeline/cacherelative to the common bin directory