diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d162e0e..282faecc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,18 +80,10 @@ endmacro() message(STATUS "General purpose GPU compute framework built on Vulkan") message(STATUS "=======================================================") -# Enable or disable targets -kompute_option(KOMPUTE_OPT_BUILD_TESTS "Enable if you want to build tests." OFF) -kompute_option(KOMPUTE_OPT_CODE_COVERAGE "Enable if you want code coverage." OFF) -kompute_option(KOMPUTE_OPT_BUILD_DOCS "Enable if you want to build documentation." OFF) -kompute_option(KOMPUTE_OPT_INSTALL "Enable if you want to enable installation." OFF) - # Build options -kompute_option(KOMPUTE_OPT_BUILD_PYTHON "Enable if you want to build python bindings." OFF) -kompute_log_level(KOMPUTE_OPT_LOG_LEVEL "Internally we use Spdlog or fmt for logging, depending on the value of 'KOMPUTE_OPT_USE_SPDLOG'. The log level used can be changed here. Possible values: 'Trace', 'Debug', 'Info', 'Warn', 'Error', 'Critical', 'Off', 'Default'. If set to 'Off' logging will be deactivated completely. If set to 'Default', the log level will be set to 'Info' for release builds and 'Debug' else." "Default") +kompute_log_level(KOMPUTE_OPT_LOG_LEVEL "Internally we use Spdlog or fmt for logging, depending on the value of 'KOMPUTE_OPT_USE_SPDLOG'. The log level used can be changed here. Possible values: 'Trace', 'Debug', 'Info', 'Warn', 'Error', 'Critical', 'Off', 'Default'. If set to 'Off' logging will be deactivated completely. If set to 'Default', the log level will be set to 'Info' for release builds and 'Debug' else." "Off") kompute_option(KOMPUTE_OPT_USE_SPDLOG "If enabled, logging via KP_LOG_ will happen through Spdlog instead of plan fmt." OFF) -kompute_option(KOMPUTE_OPT_ANDROID_BUILD "Enable android compilation flags required." OFF) -kompute_option(KOMPUTE_OPT_DISABLE_VK_DEBUG_LAYERS "Explicitly disable debug layers even on debug." OFF) +kompute_option(KOMPUTE_OPT_DISABLE_VK_DEBUG_LAYERS "Explicitly disable debug layers even on debug." ON) kompute_option(KOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK "Whether to check if your driver supports the Vulkan Header version you are linking against. This might be useful in case you build shared on a different system than you run later." OFF) kompute_option(KOMPUTE_OPT_BUILD_SHADERS "Rebuilds all compute shaders during compilation and does not use the already precompiled versions. Requires glslangValidator to be installed on your system." OFF) @@ -99,8 +91,6 @@ kompute_option(KOMPUTE_OPT_BUILD_SHADERS "Rebuilds all compute shaders during co kompute_option(KOMPUTE_OPT_USE_BUILT_IN_SPDLOG "Use the built-in version of Spdlog. Requires 'KOMPUTE_OPT_USE_SPDLOG' to be set to ON in order to have any effect." ON) kompute_option(KOMPUTE_OPT_SPDLOG_ASYNC_MODE "If spdlog is enabled this allows for selecting whether the default logger setup creates sync or async logger" OFF) kompute_option(KOMPUTE_OPT_USE_BUILT_IN_FMT "Use the built-in version of fmt." ON) -kompute_option(KOMPUTE_OPT_USE_BUILT_IN_GOOGLE_TEST "Use the built-in version of GoogleTest." ON) -kompute_option(KOMPUTE_OPT_USE_BUILT_IN_PYBIND11 "Use the built-in version of pybind11." ON) kompute_option(KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER "Use the built-in version of Vulkan Headers. This could be helpful in case your system Vulkan Headers are too new for your driver. If you set this to OFF, please make sure your system Vulkan Headers are supported by your driver." ON) kompute_option_string(KOMPUTE_OPT_BUILT_IN_VULKAN_HEADER_TAG "The git tag used for the built-in Vulkan Headers when 'KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER' is enabled. A list of tags can be found here: https://github.com/KhronosGroup/Vulkan-Headers/tags" "v1.3.231") message(STATUS "=======================================================") @@ -118,41 +108,28 @@ include(cmake/check_vulkan_version.cmake) include(FetchContent) # Vulkan Header -# We don't import Vulkan library if Android build as it is built dynamically -# Otherwise it is expected that the Vulkan SDK and dependencies are installed -# Has to happen AFTER using the build-in Vulkan headers to prevent multiple targets with the name Vulkan::Headers -if(KOMPUTE_OPT_ANDROID_BUILD) - add_library(vulkanAndroid INTERFACE) - set(VULKAN_INCLUDE_DIR ${ANDROID_NDK}/sources/third_party/vulkan/src/include) - target_sources(vulkanAndroid INTERFACE ${VULKAN_INCLUDE_DIR}/vulkan/vulkan.hpp) - target_include_directories(vulkanAndroid INTERFACE ${VULKAN_INCLUDE_DIR}) - - target_compile_definitions(vulkanAndroid INTERFACE VK_NO_PROTOTYPES=1) - target_compile_definitions(vulkanAndroid INTERFACE VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1) -else() - if(KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER) - FetchContent_Declare(vulkan_header GIT_REPOSITORY https://github.com/KhronosGroup/Vulkan-Headers.git - GIT_TAG ${KOMPUTE_OPT_BUILT_IN_VULKAN_HEADER_TAG}) # Source: https://github.com/KhronosGroup/Vulkan-Headers/tags - FetchContent_MakeAvailable(vulkan_header) - - if(NOT KOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK) - # Ensure the driver supports this Vulkan version - check_vulkan_version(INCLUDE_DIR "${vulkan_header_SOURCE_DIR}/include") - endif() +if(KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER) + FetchContent_Declare(vulkan_header GIT_REPOSITORY https://github.com/KhronosGroup/Vulkan-Headers.git + GIT_TAG ${KOMPUTE_OPT_BUILT_IN_VULKAN_HEADER_TAG}) # Source: https://github.com/KhronosGroup/Vulkan-Headers/tags + FetchContent_MakeAvailable(vulkan_header) + + if(NOT KOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK) + # Ensure the driver supports this Vulkan version + check_vulkan_version(INCLUDE_DIR "${vulkan_header_SOURCE_DIR}/include") endif() +endif() - find_package(Vulkan REQUIRED) +find_package(Vulkan REQUIRED) - if(Vulkan_FOUND AND NOT TARGET Vulkan::Headers) - add_library(Vulkan::Headers INTERFACE IMPORTED) - set_target_properties(Vulkan::Headers PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${Vulkan_INCLUDE_DIRS}") - endif() +if(Vulkan_FOUND AND NOT TARGET Vulkan::Headers) + add_library(Vulkan::Headers INTERFACE IMPORTED) + set_target_properties(Vulkan::Headers PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${Vulkan_INCLUDE_DIRS}") +endif() - if(NOT KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER AND NOT KOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK) - # Ensure the driver supports this Vulkan version - check_vulkan_version(INCLUDE_DIR ${Vulkan_INCLUDE_DIR}) - endif() +if(NOT KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER AND NOT KOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK) + # Ensure the driver supports this Vulkan version + check_vulkan_version(INCLUDE_DIR ${Vulkan_INCLUDE_DIR}) endif() # Spdlog @@ -161,7 +138,6 @@ if(KOMPUTE_OPT_USE_SPDLOG) if(NOT KOMPUTE_OPT_LOG_LEVEL_DISABLED) if(KOMPUTE_OPT_USE_BUILT_IN_SPDLOG) - set(SPDLOG_INSTALL ${KOMPUTE_OPT_INSTALL}) set(SPDLOG_BUILD_SHARED ${BUILD_SHARED_LIBS}) FetchContent_Declare(spdlog GIT_REPOSITORY https://github.com/gabime/spdlog.git @@ -174,87 +150,28 @@ if(KOMPUTE_OPT_USE_SPDLOG) endif() # fmt -if(KOMPUTE_OPT_USE_BUILT_IN_FMT) - set(FMT_INSTALL ${KOMPUTE_OPT_INSTALL}) - FetchContent_Declare(fmt GIT_REPOSITORY https://github.com/fmtlib/fmt.git - GIT_TAG 8.1.1) # Source: https://github.com/fmtlib/fmt/releases - FetchContent_MakeAvailable(fmt) -else() - find_package(fmt REQUIRED) -endif() - -# GoogleTest -if(KOMPUTE_OPT_BUILD_TESTS) - if(KOMPUTE_OPT_USE_BUILT_IN_GOOGLE_TEST) - FetchContent_Declare(googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.11.0) # Source: https://github.com/google/googletest/releases - - # Use a shared C runtime in case we build shared - set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) - FetchContent_MakeAvailable(googletest) - - add_library(gtest_int INTERFACE) - target_link_libraries(gtest_int INTERFACE gtest) - target_include_directories(gtest_int INTERFACE ${googletest_SOURCE_DIR}/include) - - add_library(GTest::GTest ALIAS gtest_int) - - # Group under the "tests/gtest" project folder in IDEs such as Visual Studio. - set_property(TARGET gtest PROPERTY FOLDER "tests/gtest") - set_property(TARGET gtest_main PROPERTY FOLDER "tests/gtest") +if(NOT TARGET fmt::fmt) + if(KOMPUTE_OPT_USE_BUILT_IN_FMT) + FetchContent_Declare(fmt GIT_REPOSITORY https://github.com/fmtlib/fmt.git + GIT_TAG 10.0.0) # Source: https://github.com/fmtlib/fmt/releases + FetchContent_MakeAvailable(fmt) else() - find_package(GTest CONFIG REQUIRED) + find_package(fmt REQUIRED) endif() endif() -# pybind11 -if(KOMPUTE_OPT_BUILD_PYTHON) - if(KOMPUTE_OPT_USE_BUILT_IN_PYBIND11) - FetchContent_Declare(pybind GIT_REPOSITORY https://github.com/pybind/pybind11.git - GIT_TAG v2.9.2) # Source: https://github.com/pybind/pybind11/releases - FetchContent_MakeAvailable(pybind) - else() - find_package(pybind11 REQUIRED) - endif() - - find_package(PythonLibs REQUIRED) -endif() +add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1) # #################################################### # Preprocessor Macros # #################################################### -if(KOMPUTE_OPT_ANDROID_BUILD) - add_compile_definitions(VK_USE_PLATFORM_ANDROID_KHR=1) -endif() - -if(KOMPUTE_OPT_BUILD_PYTHON) - add_compile_definitions(KOMPUTE_BUILD_PYTHON=1) -endif() - if(KOMPUTE_OPT_DISABLE_VK_DEBUG_LAYERS) add_compile_definitions(KOMPUTE_DISABLE_VK_DEBUG_LAYERS=1) endif() -# #################################################### -# Misc Options -# #################################################### -if(KOMPUTE_OPT_INSTALL) - # Enable install parameters for glslang (overrides parameters passed) - # When install is enabled the glslang libraries become shared - set(ENABLE_GLSLANG_INSTALL ON CACHE BOOL "Enables install of glslang" FORCE) -endif() - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic -Werror") -endif() - -if(KOMPUTE_OPT_CODE_COVERAGE) - if(NOT UNIX) - message(FATAL_ERROR "KOMPUTE_OPT_CODE_COVERAGE can only be enabled in unix based systems due to limitation on gcov.") - endif() - - include(cmake/code_coverage.cmake) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic -Werror -Wno-error=array-bounds") endif() # If glslang is cloned, then SPIRV/GlslangToSpv.h will be used instead of glslang/SPIRV/GlslangToSpv.h @@ -269,18 +186,6 @@ function(kompute_make KOMPUTE_MAKE_TARGET) COMMAND make -C ${PROJECT_SOURCE_DIR} ${KOMPUTE_MAKE_TARGET}) endfunction() -add_subdirectory(src) - -if(KOMPUTE_OPT_BUILD_TESTS) - enable_testing() - add_subdirectory(test) -endif() - -if(KOMPUTE_OPT_BUILD_DOCS) - set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/config" ${CMAKE_MODULE_PATH}) - add_subdirectory(docs) -endif() +add_executable(xxd external/bin/xxd.c) -if(KOMPUTE_OPT_BUILD_PYTHON) - add_subdirectory(python) -endif() +add_subdirectory(src) diff --git a/scripts/convert_shaders.py b/scripts/convert_shaders.py old mode 100644 new mode 100755 index 9375b670..11a3ab97 --- a/scripts/convert_shaders.py +++ b/scripts/convert_shaders.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """ Script to handle conversion of compute shaders to spirv and to headers """ diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index a354157c..b64bdc7a 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -23,14 +23,6 @@ Algorithm::isInit() void Algorithm::destroy() { - // We don't have to free memory on destroy as it's freed by the - // commandBuffer destructor if (this->mPushConstantsData) { - // free(this->mPushConstantsData); - // } - // if (this->mSpecializationConstantsData) { - // free(this->mSpecializationConstantsData); - // } - if (!this->mDevice) { KP_LOG_WARN("Kompute Algorithm destroy function reached with null " "Device pointer"); @@ -49,18 +41,6 @@ Algorithm::destroy() this->mPipeline = nullptr; } - if (this->mFreePipelineCache && this->mPipelineCache) { - KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache"); - if (!this->mPipelineCache) { - KP_LOG_WARN("Kompute Algorithm Error requested to destroy " - "pipeline cache but it is null"); - } - this->mDevice->destroy( - *this->mPipelineCache, - (vk::Optional)nullptr); - this->mPipelineCache = nullptr; - } - if (this->mFreePipelineLayout && this->mPipelineLayout) { KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout"); if (!this->mPipelineLayout) { @@ -85,20 +65,22 @@ Algorithm::destroy() this->mShaderModule = nullptr; } - // We don't call freeDescriptorSet as the descriptor pool is not created - // with VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT more at - // (https://www.khronos.org/registry/vulkan/specs/1.0/html/vkspec.html#VUID-vkFreeDescriptorSets-descriptorPool-00312)) - // if (this->mFreeDescriptorSet && this->mDescriptorSet) { - // KP_LOG_DEBUG("Kompute Algorithm Freeing Descriptor Set"); - // if (!this->mDescriptorSet) { - // KP_LOG_WARN( - // "Kompute Algorithm Error requested to free descriptor set"); - // } - // this->mDevice->freeDescriptorSets( - // *this->mDescriptorPool, 1, this->mDescriptorSet.get()); - // this->mDescriptorSet = nullptr; - //} + freeParameters(); + + if (this->mPushConstantsData) { + free(this->mPushConstantsData); + this->mPushConstantsData = nullptr; + } + + if (this->mSpecializationConstantsData) { + free(this->mSpecializationConstantsData); + this->mSpecializationConstantsData = nullptr; + } +} +void +Algorithm::freeParameters() +{ if (this->mFreeDescriptorSetLayout && this->mDescriptorSetLayout) { KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout"); if (!this->mDescriptorSetLayout) { @@ -110,43 +92,16 @@ Algorithm::destroy() (vk::Optional)nullptr); this->mDescriptorSetLayout = nullptr; } - - if (this->mFreeDescriptorPool && this->mDescriptorPool) { - KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Pool"); - if (!this->mDescriptorPool) { - KP_LOG_WARN("Kompute Algorithm Error requested to destroy " - "descriptor pool but it is null"); - } - this->mDevice->destroy( - *this->mDescriptorPool, - (vk::Optional)nullptr); - this->mDescriptorPool = nullptr; - } } void Algorithm::createParameters() { KP_LOG_DEBUG("Kompute Algorithm createParameters started"); - - std::vector descriptorPoolSizes = { - vk::DescriptorPoolSize( - vk::DescriptorType::eStorageBuffer, - static_cast(this->mTensors.size()) // Descriptor count - ) - }; - - vk::DescriptorPoolCreateInfo descriptorPoolInfo( - vk::DescriptorPoolCreateFlags(), - 1, // Max sets - static_cast(descriptorPoolSizes.size()), - descriptorPoolSizes.data()); - - KP_LOG_DEBUG("Kompute Algorithm creating descriptor pool"); - this->mDescriptorPool = std::make_shared(); - this->mDevice->createDescriptorPool( - &descriptorPoolInfo, nullptr, this->mDescriptorPool.get()); - this->mFreeDescriptorPool = true; + if (!*this->mDescriptorPool) { + KP_LOG_ERROR("Kompute Algorithm can not create descriptor pool"); + return; + } std::vector descriptorSetBindings; for (size_t i = 0; i < this->mTensors.size(); i++) { @@ -165,9 +120,15 @@ Algorithm::createParameters() KP_LOG_DEBUG("Kompute Algorithm creating descriptor set layout"); this->mDescriptorSetLayout = std::make_shared(); - this->mDevice->createDescriptorSetLayout( + vk::Result result = this->mDevice->createDescriptorSetLayout( &descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get()); - this->mFreeDescriptorSetLayout = true; + + if (result != vk::Result::eSuccess) { + KP_LOG_ERROR("Failed to create descriptor set layout. Error code: {}", vk::to_string(result)); + } else { + this->mFreeDescriptorSetLayout = true; + KP_LOG_DEBUG("Successfully allocated descriptor set layout."); + } vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo( *this->mDescriptorPool, @@ -176,8 +137,67 @@ Algorithm::createParameters() KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets"); this->mDescriptorSet = std::make_shared(); - this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo, + result = this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo, this->mDescriptorSet.get()); + + if (result != vk::Result::eSuccess) { + KP_LOG_ERROR("Failed to allocate descriptor sets. Error code: {}", vk::to_string(result)); + } else { + this->mFreeDescriptorSet = true; + KP_LOG_DEBUG("Successfully allocated descriptor sets."); + } + + this->mFreeDescriptorSet = true; + + KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets"); + for (size_t i = 0; i < this->mTensors.size(); i++) { + std::vector computeWriteDescriptorSets; + + vk::DescriptorBufferInfo descriptorBufferInfo = + this->mTensors[i]->constructDescriptorBufferInfo(); + + computeWriteDescriptorSets.push_back( + vk::WriteDescriptorSet(*this->mDescriptorSet, + i, // Destination binding + 0, // Destination array element + 1, // Descriptor count + vk::DescriptorType::eStorageBuffer, + nullptr, // Descriptor image info + &descriptorBufferInfo)); + + this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, + nullptr); + } + + KP_LOG_DEBUG("Kompute Algorithm successfully run init"); +} + +void +Algorithm::updateParameters() +{ + KP_LOG_DEBUG("Kompute Algorithm updateParameters started"); + if (!*this->mDescriptorPool) { + KP_LOG_ERROR("Kompute Algorithm can not create descriptor pool"); + return; + } + + vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo( + *this->mDescriptorPool, + 1, // Descriptor set layout count + this->mDescriptorSetLayout.get()); + + KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets"); + this->mDescriptorSet = std::make_shared(); + vk::Result result = this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo, + this->mDescriptorSet.get()); + + if (result != vk::Result::eSuccess) { + KP_LOG_ERROR("Failed to allocate descriptor sets. Error code: {}", vk::to_string(result)); + } else { + this->mFreeDescriptorSet = true; + KP_LOG_DEBUG("Successfully allocated descriptor sets."); + } + this->mFreeDescriptorSet = true; KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets"); @@ -284,16 +304,9 @@ Algorithm::createPipeline() vk::Pipeline(), 0); - vk::PipelineCacheCreateInfo pipelineCacheInfo = - vk::PipelineCacheCreateInfo(); - this->mPipelineCache = std::make_shared(); - this->mDevice->createPipelineCache( - &pipelineCacheInfo, nullptr, this->mPipelineCache.get()); - this->mFreePipelineCache = true; - #ifdef KOMPUTE_CREATE_PIPELINE_RESULT_VALUE vk::ResultValue pipelineResult = - this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo); + this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo); if (pipelineResult.result != vk::Result::eSuccess) { throw std::runtime_error("Failed to create pipeline result: " + @@ -305,7 +318,7 @@ Algorithm::createPipeline() this->mFreePipeline = true; #else vk::Pipeline pipeline = - this->mDevice->createComputePipeline(*this->mPipelineCache, pipelineInfo) + this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo) .value; this->mPipeline = std::make_shared(pipeline); this->mFreePipeline = true; @@ -367,7 +380,6 @@ Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer) void Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) { - KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size"); // The dispatch size is set up based on either explicitly provided template @@ -400,4 +412,9 @@ Algorithm::getTensors() return this->mTensors; } +void Algorithm::setTensors(const std::vector>& tensors) +{ + this->mTensors = tensors; +} + } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dbb47dbe..72a82421 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,13 +8,16 @@ endif() cmake_minimum_required(VERSION 3.20) -add_library(kompute Algorithm.cpp +add_library(kompute STATIC Algorithm.cpp Manager.cpp OpAlgoDispatch.cpp OpMemoryBarrier.cpp OpTensorCopy.cpp + OpTensorFill.cpp OpTensorSyncDevice.cpp OpTensorSyncLocal.cpp + OpBufferSyncDevice.cpp + OpBufferSyncLocal.cpp Sequence.cpp Tensor.cpp Core.cpp) @@ -25,15 +28,16 @@ add_library(kompute::kompute ALIAS kompute) set_target_properties(kompute PROPERTIES VERSION ${${PROJECT_NAME}_VERSION} - SOVERSION ${${PROJECT_NAME}_VERSION_MAJOR}) + SOVERSION ${${PROJECT_NAME}_VERSION_MAJOR} + POSITION_INDEPENDENT_CODE TRUE) # Import GNU common install directory variables include(GNUInstallDirs) -install(TARGETS kompute - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) +#install(TARGETS kompute +# RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +# ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} +# LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) # Include CMake helpers for package config files # Follow this installation guideline: https://cmake.org/cmake/help/latest/manual/cmake-packages.7.html @@ -43,8 +47,8 @@ configure_package_config_file(${PROJECT_SOURCE_DIR}/cmake/komputeConfig.cmake.in "${PROJECT_BINARY_DIR}/kompute/komputeConfig.cmake" INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/kompute) -install(FILES ${PROJECT_BINARY_DIR}/kompute/komputeConfig.cmake - ${PROJECT_BINARY_DIR}/kompute/komputeConfigVersion.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/kompute) +#install(FILES ${PROJECT_BINARY_DIR}/kompute/komputeConfig.cmake +# ${PROJECT_BINARY_DIR}/kompute/komputeConfigVersion.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/kompute) # #################################################### # Linking @@ -54,12 +58,12 @@ if(KOMPUTE_OPT_ANDROID_BUILD) android kp_logger kp_shader - fmt::fmt) + fmt::fmt-header-only) else() - target_link_libraries(kompute PUBLIC Vulkan::Vulkan + target_link_libraries(kompute PUBLIC kp_logger kp_shader - fmt::fmt) + fmt::fmt-header-only) endif() if(KOMPUTE_OPT_BUILD_PYTHON) @@ -70,6 +74,8 @@ endif() if(KOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER) target_link_libraries(kompute PUBLIC Vulkan-Headers) +else() + target_link_libraries(kompute PUBLIC Vulkan::Headers) endif() # #################################################### diff --git a/src/Core.cpp b/src/Core.cpp index 01c19ff3..020f4416 100644 --- a/src/Core.cpp +++ b/src/Core.cpp @@ -2,7 +2,6 @@ #include "kompute/Core.hpp" -#if VK_USE_PLATFORM_ANDROID_KHR #ifndef KOMPUTE_VK_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE #define KOMPUTE_VK_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE /** @@ -13,7 +12,6 @@ **/ VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE #endif // !KOMPUTE_VK_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE -#endif // VK_USE_PLATFORM_ANDROID_KHR namespace kp { } // namespace kp diff --git a/src/Manager.cpp b/src/Manager.cpp index 3c3e1165..f45bd6f6 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -3,7 +3,10 @@ #include "kompute/Manager.hpp" #include "fmt/format.h" #include "kompute/logger/Logger.hpp" + #include +#include + #include #include #include @@ -33,13 +36,6 @@ debugMessageCallback(VkDebugReportFlagsEXT /*flags*/, #endif Manager::Manager() - : Manager(0) -{ -} - -Manager::Manager(uint32_t physicalDeviceIndex, - const std::vector& familyQueueIndices, - const std::vector& desiredExtensions) { this->mManageResources = true; @@ -47,32 +43,46 @@ Manager::Manager(uint32_t physicalDeviceIndex, #if !KOMPUTE_OPT_LOG_LEVEL_DISABLED logger::setupLogger(); #endif - this->createInstance(); +} + +void Manager::initializeDevice(uint32_t physicalDeviceIndex, + const std::vector& familyQueueIndices, + const std::vector& desiredExtensions) +{ this->createDevice( familyQueueIndices, physicalDeviceIndex, desiredExtensions); } -Manager::Manager(std::shared_ptr instance, - std::shared_ptr physicalDevice, - std::shared_ptr device) +Manager::~Manager() { - this->mManageResources = false; + KP_LOG_DEBUG("Kompute Manager Destructor started"); + + if (this->mInstance == nullptr) { + KP_LOG_ERROR( + "Kompute Manager destructor reached with null Instance pointer"); + return; + } - this->mInstance = instance; - this->mPhysicalDevice = physicalDevice; - this->mDevice = device; + if (this->mDevice) { + this->destroy(); + } -// Make sure the logger is setup -#if !KOMPUTE_OPT_LOG_LEVEL_DISABLED - logger::setupLogger(); +#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS + if (this->mDebugReportCallback) { + this->mInstance->destroyDebugReportCallbackEXT( + this->mDebugReportCallback, nullptr, this->mDebugDispatcher); + this->mDebugReportCallback = nullptr; + KP_LOG_DEBUG("Kompute Manager Destroyed Debug Report Callback"); + } #endif -} -Manager::~Manager() -{ - KP_LOG_DEBUG("Kompute Manager Destructor started"); - this->destroy(); + if (this->mFreeInstance) { + this->mInstance->destroy( + (vk::Optional)nullptr); + this->mInstance = nullptr; + KP_LOG_DEBUG("Kompute Manager Destroyed Instance"); + } } void @@ -98,15 +108,14 @@ Manager::destroy() this->mManagedSequences.clear(); } - if (this->mManageResources && this->mManagedAlgorithms.size()) { + if (this->mManageResources && !this->mManagedAlgorithmsMap.empty()) { KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms"); - for (const std::weak_ptr& weakAlgorithm : - this->mManagedAlgorithms) { - if (std::shared_ptr algorithm = weakAlgorithm.lock()) { + for (const auto& kv : this->mManagedAlgorithmsMap) { + if (std::shared_ptr algorithm = kv.second) { algorithm->destroy(); } } - this->mManagedAlgorithms.clear(); + this->mManagedAlgorithmsMap.clear(); } if (this->mManageResources && this->mManagedTensors.size()) { @@ -119,33 +128,25 @@ Manager::destroy() this->mManagedTensors.clear(); } - if (this->mFreeDevice) { - KP_LOG_INFO("Destroying device"); + if (this->mPipelineCache) { + KP_LOG_DEBUG("Kompute Manager Destroying pipeline cache"); + if (!this->mPipelineCache) { + KP_LOG_WARN("Kompute Manager Error requested to destroy " + "pipeline cache but it is null"); + } this->mDevice->destroy( + *this->mPipelineCache, (vk::Optional)nullptr); - this->mDevice = nullptr; - KP_LOG_DEBUG("Kompute Manager Destroyed Device"); - } - - if (this->mInstance == nullptr) { - KP_LOG_ERROR( - "Kompute Manager destructor reached with null Instance pointer"); - return; + this->mPipelineCache = nullptr; } -#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS - if (this->mDebugReportCallback) { - this->mInstance->destroyDebugReportCallbackEXT( - this->mDebugReportCallback, nullptr, this->mDebugDispatcher); - KP_LOG_DEBUG("Kompute Manager Destroyed Debug Report Callback"); - } -#endif - - if (this->mFreeInstance) { - this->mInstance->destroy( + if (this->mFreeDevice && this->mDevice) { + KP_LOG_INFO("Destroying device"); + this->mComputeQueues.clear(); + this->mDevice->destroy( (vk::Optional)nullptr); - this->mInstance = nullptr; - KP_LOG_DEBUG("Kompute Manager Destroyed Instance"); + this->mDevice = nullptr; + KP_LOG_DEBUG("Kompute Manager Destroyed Device"); } } @@ -179,6 +180,16 @@ Manager::createInstance() applicationExtensions.data(); } + try { + mDynamicLoader = std::make_shared(); + } catch (const std::exception & err) { + return; + } + + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr = + mDynamicLoader->getProcAddress("vkGetInstanceProcAddr"); + VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); + #ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS KP_LOG_DEBUG("Kompute Manager adding debug validation layers"); // We'll identify the layers that are supported @@ -233,20 +244,18 @@ Manager::createInstance() } #endif -#if VK_USE_PLATFORM_ANDROID_KHR - vk::DynamicLoader dl; - PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr = - dl.getProcAddress("vkGetInstanceProcAddr"); - VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); -#endif // VK_USE_PLATFORM_ANDROID_KHR - this->mInstance = std::make_shared(); - vk::createInstance( + vk::Result r = vk::createInstance( &computeInstanceCreateInfo, nullptr, this->mInstance.get()); + if (r != vk::Result::eSuccess) { + KP_LOG_ERROR( + "Kompute Manager Error allocating vulkan instance", vk::to_string(r)); + this->mInstance = nullptr; + this->mFreeInstance = false; + return; + } -#if VK_USE_PLATFORM_ANDROID_KHR VULKAN_HPP_DEFAULT_DISPATCHER.init(*this->mInstance); -#endif // VK_USE_PLATFORM_ANDROID_KHR KP_LOG_DEBUG("Kompute Manager Instance Created"); @@ -261,7 +270,7 @@ Manager::createInstance() (PFN_vkDebugReportCallbackEXT)debugMessageCallback; debugCreateInfo.flags = debugFlags; - this->mDebugDispatcher.init(*this->mInstance, &vkGetInstanceProcAddr); + this->mDebugDispatcher.init(*this->mInstance, vkGetInstanceProcAddr); this->mDebugReportCallback = this->mInstance->createDebugReportCallbackEXT( debugCreateInfo, nullptr, this->mDebugDispatcher); @@ -272,23 +281,39 @@ Manager::createInstance() void Manager::clear() { - if (this->mManageResources) { + if (!this->mManageResources) { + return; + } + + auto getTotalObjs = [this]() { + return this->mManagedTensors.size() + this->mManagedAlgorithmsMap.size() + this->mManagedSequences.size(); + }; + size_t objTotal = getTotalObjs(); + while (objTotal > 0) { this->mManagedTensors.erase( std::remove_if(begin(this->mManagedTensors), end(this->mManagedTensors), [](std::weak_ptr t) { return t.expired(); }), end(this->mManagedTensors)); - this->mManagedAlgorithms.erase( - std::remove_if( - begin(this->mManagedAlgorithms), - end(this->mManagedAlgorithms), - [](std::weak_ptr t) { return t.expired(); }), - end(this->mManagedAlgorithms)); + for (auto it = this->mManagedAlgorithmsMap.begin(); + it != this->mManagedAlgorithmsMap.end();) { + if (it->second) { + it = this->mManagedAlgorithmsMap.erase(it); + } else { + ++it; + } + } this->mManagedSequences.erase( std::remove_if(begin(this->mManagedSequences), end(this->mManagedSequences), [](std::weak_ptr t) { return t.expired(); }), end(this->mManagedSequences)); + + size_t newTotal = getTotalObjs(); + if (newTotal == objTotal) { + break; + } + objTotal = newTotal; } } @@ -338,7 +363,7 @@ Manager::createDevice(const std::vector& familyQueueIndices, KP_LOG_INFO("Using physical device index {} found {}", physicalDeviceIndex, - physicalDeviceProperties.deviceName); + physicalDeviceProperties.deviceName.data()); if (familyQueueIndices.empty()) { // Find compute queue @@ -413,17 +438,44 @@ Manager::createDevice(const std::vector& familyQueueIndices, fmt::join(validExtensions, ", ")); } + vk::PhysicalDeviceFeatures2 supportedFeatures; + vk::PhysicalDeviceVulkan12Features supportedFeatures12; + supportedFeatures.pNext = &supportedFeatures12; + physicalDevice.getFeatures2(&supportedFeatures); + + vk::PhysicalDeviceFeatures features; + features.shaderInt16 = true; + + vk::PhysicalDeviceVulkan11Features features11; + features11.uniformAndStorageBuffer16BitAccess = true; + features11.storageBuffer16BitAccess = true; + features11.pNext = nullptr; + + vk::PhysicalDeviceVulkan12Features features12; + features12.storageBuffer8BitAccess = true; + features12.uniformAndStorageBuffer8BitAccess = true; + features12.shaderFloat16 = supportedFeatures12.shaderFloat16; + features12.shaderInt8 = true; + features12.pNext = &features11; + vk::DeviceCreateInfo deviceCreateInfo(vk::DeviceCreateFlags(), deviceQueueCreateInfos.size(), deviceQueueCreateInfos.data(), {}, {}, validExtensions.size(), - validExtensions.data()); + validExtensions.data(), + &features); + + deviceCreateInfo.pNext = &features12; this->mDevice = std::make_shared(); - physicalDevice.createDevice( + vk::Result r = physicalDevice.createDevice( &deviceCreateInfo, nullptr, this->mDevice.get()); + if (r != vk::Result::eSuccess) { + KP_LOG_ERROR("Kompute Manager could not create device"); + } + KP_LOG_DEBUG("Kompute Manager device created"); for (const uint32_t& familyQueueIndex : this->mComputeQueueFamilyIndices) { @@ -439,6 +491,12 @@ Manager::createDevice(const std::vector& familyQueueIndices, } KP_LOG_DEBUG("Kompute Manager compute queue obtained"); + + mPipelineCache = std::make_shared(); + vk::PipelineCacheCreateInfo pipelineCacheInfo = + vk::PipelineCacheCreateInfo(); + this->mDevice->createPipelineCache( + &pipelineCacheInfo, nullptr, mPipelineCache.get()); } std::shared_ptr diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index a76fbd58..edc0f6eb 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -24,9 +24,9 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer) this->mAlgorithm->getTensors()) { tensor->recordPrimaryBufferMemoryBarrier( commandBuffer, - vk::AccessFlagBits::eTransferWrite, + vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead, - vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eComputeShader); } diff --git a/src/OpBufferSyncDevice.cpp b/src/OpBufferSyncDevice.cpp new file mode 100644 index 00000000..1812d04b --- /dev/null +++ b/src/OpBufferSyncDevice.cpp @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "kompute/operations/OpBufferSyncDevice.hpp" + +namespace kp { + +OpBufferSyncDevice::OpBufferSyncDevice( + vk::Buffer *primaryBuffer, + vk::Buffer *stagingBuffer, + vk::DeviceSize size) + : mPrimaryBuffer(primaryBuffer) + , mStagingBuffer(stagingBuffer) + , mSize(size) +{ + KP_LOG_DEBUG("Kompute OpBufferSyncDevice constructor with params"); +} + +OpBufferSyncDevice::~OpBufferSyncDevice() +{ + KP_LOG_DEBUG("Kompute OpBufferSyncDevice destructor started"); +} + +void +OpBufferSyncDevice::record(const vk::CommandBuffer& commandBuffer) +{ + KP_LOG_DEBUG("Kompute OpBufferSyncDevice record called"); + vk::BufferCopy copyRegion(0, 0, mSize); + commandBuffer.copyBuffer(*mStagingBuffer, *mPrimaryBuffer, copyRegion); +} + +void +OpBufferSyncDevice::preEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpBufferSyncDevice preEval called"); +} + +void +OpBufferSyncDevice::postEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpBufferSyncDevice postEval called"); +} + +} diff --git a/src/OpBufferSyncLocal.cpp b/src/OpBufferSyncLocal.cpp new file mode 100644 index 00000000..a829819f --- /dev/null +++ b/src/OpBufferSyncLocal.cpp @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "kompute/operations/OpBufferSyncLocal.hpp" + +namespace kp { + +OpBufferSyncLocal::OpBufferSyncLocal( + vk::Buffer *primaryBuffer, + vk::Buffer *stagingBuffer, + vk::DeviceSize size) + : mPrimaryBuffer(primaryBuffer) + , mStagingBuffer(stagingBuffer) + , mSize(size) +{ + KP_LOG_DEBUG("Kompute OpBufferSyncLocal constructor with params"); +} + +OpBufferSyncLocal::~OpBufferSyncLocal() +{ + KP_LOG_DEBUG("Kompute OpBufferSyncLocal destructor started"); +} + +void +OpBufferSyncLocal::record(const vk::CommandBuffer& commandBuffer) +{ + KP_LOG_DEBUG("Kompute OpBufferSyncLocal record called"); + vk::BufferCopy copyRegion(0, 0, mSize); + commandBuffer.copyBuffer(*mPrimaryBuffer, *mStagingBuffer, copyRegion); +} + +void +OpBufferSyncLocal::preEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpBufferSyncLocal preEval called"); +} + +void +OpBufferSyncLocal::postEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpBufferSyncLocal postEval called"); +} + +} diff --git a/src/OpTensorFill.cpp b/src/OpTensorFill.cpp new file mode 100644 index 00000000..bda7d604 --- /dev/null +++ b/src/OpTensorFill.cpp @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "kompute/operations/OpTensorFill.hpp" +#include "kompute/Tensor.hpp" + +namespace kp { + +OpTensorFill::OpTensorFill(const std::vector>& tensors) +{ + KP_LOG_DEBUG("Kompute OpTensorFill constructor with params"); + + if (tensors.size() < 1) { + throw std::runtime_error( + "Kompute OpTensorFill called with less than 1 tensor"); + } + + this->mTensors = tensors; +} + +OpTensorFill::~OpTensorFill() +{ + KP_LOG_DEBUG("Kompute OpTensorFill destructor started"); +} + +void +OpTensorFill::record(const vk::CommandBuffer& commandBuffer) +{ + KP_LOG_DEBUG("Kompute OpTensorFill record called"); + + for (size_t i = 0; i < this->mTensors.size(); i++) { + this->mTensors[i]->recordFill(commandBuffer, 0); + } +} + +void +OpTensorFill::preEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpTensorFill preEval called"); +} + +void +OpTensorFill::postEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpTensorFill postEval called"); +} + +} diff --git a/src/Sequence.cpp b/src/Sequence.cpp index da3b379a..08cc0093 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -84,6 +84,7 @@ void Sequence::clear() { KP_LOG_DEBUG("Kompute Sequence calling clear"); + this->mOperations.clear(); if (this->isRecording()) { this->end(); } diff --git a/src/Tensor.cpp b/src/Tensor.cpp index ad5cac9a..84dce08e 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -44,6 +44,11 @@ Tensor::Tensor(std::shared_ptr physicalDevice, uint32_t elementTotalCount, uint32_t elementMemorySize, const TensorDataTypes& dataType, + vk::DeviceMemory *primaryMemory, + vk::Buffer *primaryBuffer, + vk::DeviceMemory *stagingMemory, + vk::Buffer *stagingBuffer, + vk::DeviceSize offset, const TensorTypes& tensorType) { KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}", @@ -55,7 +60,7 @@ Tensor::Tensor(std::shared_ptr physicalDevice, this->mDataType = dataType; this->mTensorType = tensorType; - this->rebuild(data, elementTotalCount, elementMemorySize); + this->rebuild(data, elementTotalCount, elementMemorySize, primaryMemory, primaryBuffer, stagingMemory, stagingBuffer, offset); } Tensor::~Tensor() @@ -71,14 +76,20 @@ Tensor::~Tensor() } void -Tensor::rebuild(void* data, +Tensor::rebuild(void* /*data*/, uint32_t elementTotalCount, - uint32_t elementMemorySize) + uint64_t memorySize, + vk::DeviceMemory *primaryMemory, + vk::Buffer *primaryBuffer, + vk::DeviceMemory *stagingMemory, + vk::Buffer *stagingBuffer, + vk::DeviceSize offset) { KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", elementTotalCount); this->mSize = elementTotalCount; - this->mDataTypeMemorySize = elementMemorySize; + this->mMemorySize = memorySize; + this->mOffset = offset; if (this->mPrimaryBuffer || this->mPrimaryMemory) { KP_LOG_DEBUG( @@ -86,12 +97,7 @@ Tensor::rebuild(void* data, this->destroy(); } - this->allocateMemoryCreateGPUResources(); - - if (this->tensorType() != Tensor::TensorTypes::eStorage) { - this->mapRawData(); - memcpy(this->mRawData, data, this->memorySize()); - } + this->setGPUResources(primaryMemory, primaryBuffer, stagingMemory, stagingBuffer, offset); } Tensor::TensorTypes @@ -113,16 +119,10 @@ Tensor::size() return this->mSize; } -uint32_t -Tensor::dataTypeMemorySize() -{ - return this->mDataTypeMemorySize; -} - -uint32_t +uint64_t Tensor::memorySize() { - return this->mSize * this->mDataTypeMemorySize; + return this->mMemorySize; } kp::Tensor::TensorDataTypes @@ -143,64 +143,13 @@ Tensor::setRawData(const void* data) memcpy(this->mRawData, data, this->memorySize()); } -void -Tensor::mapRawData() -{ - - KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); - - std::shared_ptr hostVisibleMemory = nullptr; - - if (this->mTensorType == TensorTypes::eHost) { - hostVisibleMemory = this->mPrimaryMemory; - } else if (this->mTensorType == TensorTypes::eDevice) { - hostVisibleMemory = this->mStagingMemory; - } else { - KP_LOG_WARN( - "Kompute Tensor mapping data not supported on {} tensor", toString(this->tensorType())); - return; - } - - vk::DeviceSize bufferSize = this->memorySize(); - - // Given we request coherent host memory we don't need to invalidate / - // flush - this->mRawData = this->mDevice->mapMemory( - *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); - -} - -void -Tensor::unmapRawData() -{ - - KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); - - std::shared_ptr hostVisibleMemory = nullptr; - - if (this->mTensorType == TensorTypes::eHost) { - hostVisibleMemory = this->mPrimaryMemory; - } else if (this->mTensorType == TensorTypes::eDevice) { - hostVisibleMemory = this->mStagingMemory; - } else { - KP_LOG_WARN( - "Kompute Tensor mapping data not supported on {} tensor", toString(this->tensorType())); - return; - } - - vk::DeviceSize bufferSize = this->memorySize(); - vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); - this->mDevice->flushMappedMemoryRanges(1, &mappedRange); - this->mDevice->unmapMemory(*hostVisibleMemory); -} - void Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer, std::shared_ptr copyFromTensor) { vk::DeviceSize bufferSize(this->memorySize()); - vk::BufferCopy copyRegion(0, 0, bufferSize); + vk::BufferCopy copyRegion(mOffset, mOffset, bufferSize); KP_LOG_DEBUG("Kompute Tensor recordCopyFrom data size {}.", bufferSize); @@ -214,8 +163,11 @@ Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer, void Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer) { + if (!this->mStagingBuffer) + return; + vk::DeviceSize bufferSize(this->memorySize()); - vk::BufferCopy copyRegion(0, 0, bufferSize); + vk::BufferCopy copyRegion(mOffset, mOffset, bufferSize); KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize); @@ -229,8 +181,11 @@ Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer) void Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer) { + if (!this->mStagingBuffer) + return; + vk::DeviceSize bufferSize(this->memorySize()); - vk::BufferCopy copyRegion(0, 0, bufferSize); + vk::BufferCopy copyRegion(mOffset, mOffset, bufferSize); KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize); @@ -243,8 +198,8 @@ Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer) void Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer, - std::shared_ptr bufferFrom, - std::shared_ptr bufferTo, + vk::Buffer *bufferFrom, + vk::Buffer *bufferTo, vk::DeviceSize /*bufferSize*/, vk::BufferCopy copyRegion) { @@ -252,6 +207,13 @@ Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer, commandBuffer.copyBuffer(*bufferFrom, *bufferTo, copyRegion); } +void +Tensor::recordFill(const vk::CommandBuffer &commandBuffer, + uint32_t fill) +{ + commandBuffer.fillBuffer(*this->mPrimaryBuffer, mOffset, this->memorySize(), fill); +} + void Tensor::recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::AccessFlagBits srcAccessMask, @@ -276,6 +238,9 @@ Tensor::recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::PipelineStageFlagBits srcStageMask, vk::PipelineStageFlagBits dstStageMask) { + if (!this->mStagingBuffer) + return; + KP_LOG_DEBUG("Kompute Tensor recording STAGING buffer memory barrier"); this->recordBufferMemoryBarrier(commandBuffer, @@ -321,7 +286,7 @@ Tensor::constructDescriptorBufferInfo() this->memorySize()); vk::DeviceSize bufferSize = this->memorySize(); return vk::DescriptorBufferInfo(*this->mPrimaryBuffer, - 0, // offset + mOffset, // offset bufferSize); } @@ -393,7 +358,11 @@ Tensor::getStagingMemoryPropertyFlags() } void -Tensor::allocateMemoryCreateGPUResources() +Tensor::setGPUResources(vk::DeviceMemory *primaryMemory, + vk::Buffer *primaryBuffer, + vk::DeviceMemory *stagingMemory, + vk::Buffer *stagingBuffer, + vk::DeviceSize /*offset*/) { KP_LOG_DEBUG("Kompute Tensor creating buffer"); @@ -406,104 +375,19 @@ Tensor::allocateMemoryCreateGPUResources() KP_LOG_DEBUG("Kompute Tensor creating primary buffer and memory"); - this->mPrimaryBuffer = std::make_shared(); - this->createBuffer(this->mPrimaryBuffer, - this->getPrimaryBufferUsageFlags()); - this->mFreePrimaryBuffer = true; - this->mPrimaryMemory = std::make_shared(); - this->allocateBindMemory(this->mPrimaryBuffer, - this->mPrimaryMemory, - this->getPrimaryMemoryPropertyFlags()); - this->mFreePrimaryMemory = true; + this->mPrimaryBuffer = primaryBuffer; + this->mPrimaryMemory = primaryMemory; if (this->mTensorType == TensorTypes::eDevice) { KP_LOG_DEBUG("Kompute Tensor creating staging buffer and memory"); - this->mStagingBuffer = std::make_shared(); - this->createBuffer(this->mStagingBuffer, - this->getStagingBufferUsageFlags()); - this->mFreeStagingBuffer = true; - this->mStagingMemory = std::make_shared(); - this->allocateBindMemory(this->mStagingBuffer, - this->mStagingMemory, - this->getStagingMemoryPropertyFlags()); - this->mFreeStagingMemory = true; + this->mStagingBuffer = stagingBuffer; + this->mStagingMemory = stagingMemory; } KP_LOG_DEBUG("Kompute Tensor buffer & memory creation successful"); } -void -Tensor::createBuffer(std::shared_ptr buffer, - vk::BufferUsageFlags bufferUsageFlags) -{ - - vk::DeviceSize bufferSize = this->memorySize(); - - if (bufferSize < 1) { - throw std::runtime_error( - "Kompute Tensor attempted to create a zero-sized buffer"); - } - - KP_LOG_DEBUG("Kompute Tensor creating buffer with memory size: {}, and " - "usage flags: {}", - bufferSize, - vk::to_string(bufferUsageFlags)); - - // TODO: Explore having concurrent sharing mode (with option) - vk::BufferCreateInfo bufferInfo(vk::BufferCreateFlags(), - bufferSize, - bufferUsageFlags, - vk::SharingMode::eExclusive); - - this->mDevice->createBuffer(&bufferInfo, nullptr, buffer.get()); -} - -void -Tensor::allocateBindMemory(std::shared_ptr buffer, - std::shared_ptr memory, - vk::MemoryPropertyFlags memoryPropertyFlags) -{ - - KP_LOG_DEBUG("Kompute Tensor allocating and binding memory"); - - vk::PhysicalDeviceMemoryProperties memoryProperties = - this->mPhysicalDevice->getMemoryProperties(); - - vk::MemoryRequirements memoryRequirements = - this->mDevice->getBufferMemoryRequirements(*buffer); - - uint32_t memoryTypeIndex = -1; - bool memoryTypeIndexFound = false; - for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++) { - if (memoryRequirements.memoryTypeBits & (1 << i)) { - if (((memoryProperties.memoryTypes[i]).propertyFlags & - memoryPropertyFlags) == memoryPropertyFlags) { - memoryTypeIndex = i; - memoryTypeIndexFound = true; - break; - } - } - } - if (!memoryTypeIndexFound) { - throw std::runtime_error( - "Memory type index for buffer creation not found"); - } - - KP_LOG_DEBUG( - "Kompute Tensor allocating memory index: {}, size {}, flags: {}", - memoryTypeIndex, - memoryRequirements.size, - vk::to_string(memoryPropertyFlags)); - - vk::MemoryAllocateInfo memoryAllocateInfo(memoryRequirements.size, - memoryTypeIndex); - - this->mDevice->allocateMemory(&memoryAllocateInfo, nullptr, memory.get()); - - this->mDevice->bindBufferMemory(*buffer, *memory, 0); -} - void Tensor::destroy() { @@ -513,7 +397,7 @@ Tensor::destroy() // invalidate Tensor this->mRawData = nullptr; this->mSize = 0; - this->mDataTypeMemorySize = 0; + this->mMemorySize = 0; if (!this->mDevice) { KP_LOG_WARN( @@ -521,67 +405,6 @@ Tensor::destroy() return; } - // Unmap the current memory data - if (this->tensorType() != Tensor::TensorTypes::eStorage) { - this->unmapRawData(); - } - - if (this->mFreePrimaryBuffer) { - if (!this->mPrimaryBuffer) { - KP_LOG_WARN("Kompose Tensor expected to destroy primary buffer " - "but got null buffer"); - } else { - KP_LOG_DEBUG("Kompose Tensor destroying primary buffer"); - this->mDevice->destroy( - *this->mPrimaryBuffer, - (vk::Optional)nullptr); - this->mPrimaryBuffer = nullptr; - this->mFreePrimaryBuffer = false; - } - } - - if (this->mFreeStagingBuffer) { - if (!this->mStagingBuffer) { - KP_LOG_WARN("Kompose Tensor expected to destroy staging buffer " - "but got null buffer"); - } else { - KP_LOG_DEBUG("Kompose Tensor destroying staging buffer"); - this->mDevice->destroy( - *this->mStagingBuffer, - (vk::Optional)nullptr); - this->mStagingBuffer = nullptr; - this->mFreeStagingBuffer = false; - } - } - - if (this->mFreePrimaryMemory) { - if (!this->mPrimaryMemory) { - KP_LOG_WARN("Kompose Tensor expected to free primary memory but " - "got null memory"); - } else { - KP_LOG_DEBUG("Kompose Tensor freeing primary memory"); - this->mDevice->freeMemory( - *this->mPrimaryMemory, - (vk::Optional)nullptr); - this->mPrimaryMemory = nullptr; - this->mFreePrimaryMemory = false; - } - } - - if (this->mFreeStagingMemory) { - if (!this->mStagingMemory) { - KP_LOG_WARN("Kompose Tensor expected to free staging memory but " - "got null memory"); - } else { - KP_LOG_DEBUG("Kompose Tensor freeing staging memory"); - this->mDevice->freeMemory( - *this->mStagingMemory, - (vk::Optional)nullptr); - this->mStagingMemory = nullptr; - this->mFreeStagingMemory = false; - } - } - if (this->mDevice) { this->mDevice = nullptr; } diff --git a/src/include/CMakeLists.txt b/src/include/CMakeLists.txt index e1652fdd..53e9d8ae 100644 --- a/src/include/CMakeLists.txt +++ b/src/include/CMakeLists.txt @@ -21,13 +21,16 @@ target_sources(kompute PRIVATE kompute/operations/OpMemoryBarrier.hpp kompute/operations/OpMult.hpp kompute/operations/OpTensorCopy.hpp + kompute/operations/OpTensorFill.hpp kompute/operations/OpTensorSyncDevice.hpp kompute/operations/OpTensorSyncLocal.hpp + kompute/operations/OpBufferSyncDevice.hpp + kompute/operations/OpBufferSyncLocal.hpp kompute/logger/Logger.hpp ) -install(DIRECTORY kompute DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +#install(DIRECTORY kompute DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) # #################################################### # Logger @@ -41,4 +44,4 @@ target_sources(kp_logger PRIVATE kompute/logger/Logger.hpp ) -install(DIRECTORY logger DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) \ No newline at end of file +#install(DIRECTORY logger DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) \ No newline at end of file diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index 1917dd37..e5fef1f5 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -36,6 +36,8 @@ class Algorithm */ template Algorithm(std::shared_ptr device, + vk::PipelineCache *pipelineCache, + vk::DescriptorPool *pool, const std::vector>& tensors = {}, const std::vector& spirv = {}, const Workgroup& workgroup = {}, @@ -45,6 +47,8 @@ class Algorithm KP_LOG_DEBUG("Kompute Algorithm Constructor with device"); this->mDevice = device; + this->mPipelineCache = pipelineCache; + this->mDescriptorPool = pool; if (tensors.size() && spirv.size()) { KP_LOG_INFO( @@ -197,10 +201,18 @@ class Algorithm { uint32_t memorySize = sizeof(decltype(pushConstants.back())); uint32_t size = pushConstants.size(); - this->setPushConstants(pushConstants.data(), size, memorySize); } + void updateDescriptors(vk::DescriptorPool *pool) + { + this->mDescriptorPool = pool; + this->setWorkgroup( + this->mWorkgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1); + + this->updateParameters(); // TODO: See if we can reduce this + } + /** * Sets the push constants to the new value provided to use in the next * bindPush() with the raw memory block location and memory size to be used. @@ -210,7 +222,7 @@ class Algorithm * @param size The number of data elements provided in the data * @param memorySize The memory size of each of the data elements in bytes. */ - void setPushConstants(void* data, uint32_t size, uint32_t memorySize) + void setPushConstants(const void* data, uint32_t size, uint32_t memorySize) { uint32_t totalSize = memorySize * size; @@ -272,6 +284,7 @@ class Algorithm * @returns The list of tensors used in the algorithm. */ const std::vector>& getTensors(); + void setTensors(const std::vector>& tensors); void destroy(); @@ -283,16 +296,14 @@ class Algorithm // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mDescriptorSetLayout; bool mFreeDescriptorSetLayout = false; - std::shared_ptr mDescriptorPool; - bool mFreeDescriptorPool = false; + vk::DescriptorPool *mDescriptorPool = nullptr; std::shared_ptr mDescriptorSet; bool mFreeDescriptorSet = false; std::shared_ptr mShaderModule; bool mFreeShaderModule = false; std::shared_ptr mPipelineLayout; bool mFreePipelineLayout = false; - std::shared_ptr mPipelineCache; - bool mFreePipelineCache = false; + vk::PipelineCache *mPipelineCache = nullptr; std::shared_ptr mPipeline; bool mFreePipeline = false; @@ -311,7 +322,9 @@ class Algorithm void createPipeline(); // Parameters + void freeParameters(); void createParameters(); + void updateParameters(); }; } // End namespace kp diff --git a/src/include/kompute/Core.hpp b/src/include/kompute/Core.hpp index 2384e47b..406e6b5d 100644 --- a/src/include/kompute/Core.hpp +++ b/src/include/kompute/Core.hpp @@ -15,7 +15,7 @@ typedef std::vector Constants; #define KOMPUTE_VK_API_MAJOR_VERSION 1 #endif // KOMPUTE_VK_API_MAJOR_VERSION #ifndef KOMPUTE_VK_API_MINOR_VERSION -#define KOMPUTE_VK_API_MINOR_VERSION 1 +#define KOMPUTE_VK_API_MINOR_VERSION 2 #endif // KOMPUTE_VK_API_MINOR_VERSION #define KOMPUTE_VK_API_VERSION \ VK_MAKE_VERSION( \ diff --git a/src/include/kompute/Kompute.hpp b/src/include/kompute/Kompute.hpp index e54adc1b..70e0dd43 100644 --- a/src/include/kompute/Kompute.hpp +++ b/src/include/kompute/Kompute.hpp @@ -13,6 +13,9 @@ #include "operations/OpTensorCopy.hpp" #include "operations/OpTensorSyncDevice.hpp" #include "operations/OpTensorSyncLocal.hpp" +#include "operations/OpBufferSyncDevice.hpp" +#include "operations/OpBufferSyncLocal.hpp" +#include "operations/OpTensorFill.hpp" // Will be build by CMake and placed inside the build directory #include "ShaderLogisticRegression.hpp" diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 52f9ada7..780c352e 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -20,14 +20,30 @@ class Manager { public: /** - Base constructor and default used which creates the base resources - including choosing the device 0 by default. + Base constructor. */ Manager(); /** - * Similar to base constructor but allows for further configuration to use - * when creating the Vulkan resources. + * Manager destructor which would ensure all owned resources are destroyed + * unless explicitly stated that resources should not be destroyed or freed. + */ + ~Manager(); + + bool hasInstance() const { + return this->mInstance.get(); + } + + bool hasDevice() const { + return this->mDevice.get(); + } + + bool hasVulkan() const { + return this->mDynamicLoader.get(); + } + + /** + * Initialize a device. * * @param physicalDeviceIndex The index of the physical device to use * @param familyQueueIndices (Optional) List of queue indices to add for @@ -35,29 +51,10 @@ class Manager * @param desiredExtensions The desired extensions to load from * physicalDevice */ - Manager(uint32_t physicalDeviceIndex, + void initializeDevice(uint32_t physicalDeviceIndex, const std::vector& familyQueueIndices = {}, const std::vector& desiredExtensions = {}); - /** - * Manager constructor which allows your own vulkan application to integrate - * with the kompute use. - * - * @param instance Vulkan compute instance to base this application - * @param physicalDevice Vulkan physical device to use for application - * @param device Vulkan logical device to use for all base resources - * @param physicalDeviceIndex Index for vulkan physical device used - */ - Manager(std::shared_ptr instance, - std::shared_ptr physicalDevice, - std::shared_ptr device); - - /** - * Manager destructor which would ensure all owned resources are destroyed - * unless explicitly stated that resources should not be destroyed or freed. - */ - ~Manager(); - /** * Create a managed sequence that will be destroyed by this manager * if it hasn't been destroyed by its reference count going to zero. @@ -81,12 +78,16 @@ class Manager template std::shared_ptr> tensorT( const std::vector& data, + vk::DeviceMemory *primaryMemory, + vk::Buffer *primaryBuffer, + vk::DeviceMemory *stagingMemory, + vk::Buffer *stagingBuffer, Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) { KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); std::shared_ptr> tensor{ new kp::TensorT( - this->mPhysicalDevice, this->mDevice, data, tensorType) }; + this->mPhysicalDevice, this->mDevice, data, primaryMemory, primaryBuffer, stagingMemory, stagingBuffer, tensorType) }; if (this->mManageResources) { this->mManagedTensors.push_back(tensor); @@ -95,26 +96,29 @@ class Manager return tensor; } - std::shared_ptr> tensor( - const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) - { - return this->tensorT(data, tensorType); - } - std::shared_ptr tensor( void* data, uint32_t elementTotalCount, - uint32_t elementMemorySize, + uint64_t memorySize, const Tensor::TensorDataTypes& dataType, + vk::DeviceMemory *primaryMemory, + vk::Buffer *primaryBuffer, + vk::DeviceMemory *stagingMemory, + vk::Buffer *stagingBuffer, + vk::DeviceSize offset, Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) { std::shared_ptr tensor{ new kp::Tensor(this->mPhysicalDevice, this->mDevice, data, elementTotalCount, - elementMemorySize, + memorySize, dataType, + primaryMemory, + primaryBuffer, + stagingMemory, + stagingBuffer, + offset, tensorType) }; if (this->mManageResources) { @@ -140,6 +144,8 @@ class Manager * @returns Shared pointer with initialised algorithm */ std::shared_ptr algorithm( + const std::string &name, + vk::DescriptorPool *pool, const std::vector>& tensors = {}, const std::vector& spirv = {}, const Workgroup& workgroup = {}, @@ -147,7 +153,7 @@ class Manager const std::vector& pushConstants = {}) { return this->algorithm<>( - tensors, spirv, workgroup, specializationConstants, pushConstants); + name, pool, tensors, spirv, workgroup, specializationConstants, pushConstants); } /** @@ -166,6 +172,8 @@ class Manager */ template std::shared_ptr algorithm( + const std::string &name, + vk::DescriptorPool *pool, const std::vector>& tensors, const std::vector& spirv, const Workgroup& workgroup, @@ -177,6 +185,8 @@ class Manager std::shared_ptr algorithm{ new kp::Algorithm( this->mDevice, + mPipelineCache.get(), + pool, tensors, spirv, workgroup, @@ -184,12 +194,24 @@ class Manager pushConstants) }; if (this->mManageResources) { - this->mManagedAlgorithms.push_back(algorithm); + this->mManagedAlgorithmsMap.insert({name, algorithm}); } return algorithm; } + bool hasAlgorithm(const std::string &name) const { + return mManagedAlgorithmsMap.find(name) != mManagedAlgorithmsMap.end(); + } + + std::shared_ptr getAlgorithm(const std::string &name) const { + auto it = mManagedAlgorithmsMap.find(name); + if (it != mManagedAlgorithmsMap.end()) { + return it->second; + } + return nullptr; + } + /** * Destroy the GPU resources and all managed resources by manager. **/ @@ -223,21 +245,27 @@ class Manager **/ std::shared_ptr getVkInstance() const; + std::shared_ptr device() const { return mDevice; } + std::shared_ptr physicalDevice() const { return mPhysicalDevice; } + std::shared_ptr pipelineCache() const { return mPipelineCache; } + private: // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mInstance = nullptr; bool mFreeInstance = false; std::shared_ptr mPhysicalDevice = nullptr; std::shared_ptr mDevice = nullptr; + std::shared_ptr mDynamicLoader = nullptr; bool mFreeDevice = false; // -------------- ALWAYS OWNED RESOURCES std::vector> mManagedTensors; std::vector> mManagedSequences; - std::vector> mManagedAlgorithms; + std::unordered_map> mManagedAlgorithmsMap; std::vector mComputeQueueFamilyIndices; std::vector> mComputeQueues; + std::shared_ptr mPipelineCache; bool mManageResources = false; @@ -249,7 +277,7 @@ class Manager // Create functions void createInstance(); void createDevice(const std::vector& familyQueueIndices = {}, - uint32_t hysicalDeviceIndex = 0, + uint32_t physicalDeviceIndex = 0, const std::vector& desiredExtensions = {}); }; diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index de9b9f69..3b29a6e2 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -120,6 +120,17 @@ class Sequence : public std::enable_shared_from_this std::shared_ptr op{ new T(tensors, std::forward(params)...) }; return this->eval(op); } + + template + std::shared_ptr eval(vk::Buffer *primaryBuffer, + vk::Buffer *stagingBuffer, + vk::DeviceSize size, + TArgs&&... params) + { + std::shared_ptr op{ new T(primaryBuffer, stagingBuffer, size, std::forward(params)...) }; + return this->eval(op); + } + /** * Eval sends all the recorded and stored operations in the vector of * operations into the gpu as a submit job with a barrier. diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index a2bcd187..20939093 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -57,8 +57,13 @@ class Tensor std::shared_ptr device, void* data, uint32_t elementTotalCount, - uint32_t elementMemorySize, + uint32_t memorySize, const TensorDataTypes& dataType, + vk::DeviceMemory *primaryMemory, + vk::Buffer *primaryBuffer, + vk::DeviceMemory *stagingMemory, + vk::Buffer *stagingBuffer, + vk::DeviceSize offset, const TensorTypes& tensorType = TensorTypes::eDevice); /** @@ -76,7 +81,12 @@ class Tensor */ void rebuild(void* data, uint32_t elementTotalCount, - uint32_t elementMemorySize); + uint64_t memorySize, + vk::DeviceMemory *primaryMemory, + vk::Buffer *primaryBuffer, + vk::DeviceMemory *stagingMemory, + vk::Buffer *stagingBuffer, + vk::DeviceSize offset); /** * Destroys and frees the GPU resources which include the buffer and memory. @@ -108,6 +118,9 @@ class Tensor void recordCopyFrom(const vk::CommandBuffer& commandBuffer, std::shared_ptr copyFromTensor); + void recordFill(const vk::CommandBuffer &commandBuffer, + uint32_t fill); + /** * Records a copy from the internal staging memory to the device memory * using an optional barrier to wait for the operation. This function would @@ -178,23 +191,12 @@ class Tensor */ uint32_t size(); - /** - * Returns the total size of a single element of the respective data type - * that this tensor holds. - * - * @return Unsigned integer representing the memory of a single element of - * the respective data type. - */ - uint32_t dataTypeMemorySize(); - /** * Returns the total memory size of the data contained by the Tensor object - * which would equate to (this->size() * this->dataTypeMemorySize()) * - * @return Unsigned integer representing the memory of a single element of - * the respective data type. + * @return Unsigned integer representing the memory of the tensor in bytes. */ - uint32_t memorySize(); + uint64_t memorySize(); /** * Retrieve the data type of the tensor (host, device, storage) @@ -248,36 +250,31 @@ class Tensor // -------------- ALWAYS OWNED RESOURCES TensorTypes mTensorType; TensorDataTypes mDataType; - uint32_t mSize; - uint32_t mDataTypeMemorySize; - void* mRawData; + uint32_t mSize = 0; + uint64_t mMemorySize = 0; + vk::DeviceSize mOffset = 0; + void* mRawData = nullptr; private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mPhysicalDevice; std::shared_ptr mDevice; - - // -------------- OPTIONALLY OWNED RESOURCES - std::shared_ptr mPrimaryBuffer; - bool mFreePrimaryBuffer = false; - std::shared_ptr mStagingBuffer; - bool mFreeStagingBuffer = false; - std::shared_ptr mPrimaryMemory; - bool mFreePrimaryMemory = false; - std::shared_ptr mStagingMemory; - bool mFreeStagingMemory = false; - - void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer - void createBuffer(std::shared_ptr buffer, - vk::BufferUsageFlags bufferUsageFlags); - void allocateBindMemory(std::shared_ptr buffer, - std::shared_ptr memory, - vk::MemoryPropertyFlags memoryPropertyFlags); + vk::Buffer *mPrimaryBuffer = nullptr; + vk::Buffer *mStagingBuffer = nullptr; + vk::DeviceMemory *mPrimaryMemory = nullptr; + vk::DeviceMemory *mStagingMemory = nullptr; + + void setGPUResources(vk::DeviceMemory *primaryMemory, + vk::Buffer *primaryBuffer, + vk::DeviceMemory *stagingMemory, + vk::Buffer *stagingBuffer, + vk::DeviceSize offset); void recordCopyBuffer(const vk::CommandBuffer& commandBuffer, - std::shared_ptr bufferFrom, - std::shared_ptr bufferTo, + vk::Buffer *bufferFrom, + vk::Buffer *bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion); + void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, const vk::Buffer& buffer, vk::AccessFlagBits srcAccessMask, @@ -290,9 +287,6 @@ class Tensor vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags(); vk::BufferUsageFlags getStagingBufferUsageFlags(); vk::MemoryPropertyFlags getStagingMemoryPropertyFlags(); - - void mapRawData(); - void unmapRawData(); }; template @@ -300,47 +294,8 @@ class TensorT : public Tensor { public: - TensorT(std::shared_ptr physicalDevice, - std::shared_ptr device, - const std::vector& data, - const TensorTypes& tensorType = TensorTypes::eDevice) - : Tensor(physicalDevice, - device, - (void*)data.data(), - data.size(), - sizeof(T), - this->dataType(), - tensorType) - { - KP_LOG_DEBUG("Kompute TensorT constructor with data size {}", - data.size()); - } - ~TensorT() { KP_LOG_DEBUG("Kompute TensorT destructor"); } - T* data() { return (T*)this->mRawData; } - - std::vector vector() - { - return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; - } - - T& operator[](int index) { return *(((T*)this->mRawData) + index); } - - void setData(const std::vector& data) - { - - KP_LOG_DEBUG("Kompute TensorT setting data with data size {}", - data.size()); - - if (data.size() != this->mSize) { - throw std::runtime_error( - "Kompute TensorT Cannot set data of different sizes"); - } - - Tensor::setRawData(data.data()); - } - TensorDataTypes dataType(); }; diff --git a/src/include/kompute/operations/OpBufferSyncDevice.hpp b/src/include/kompute/operations/OpBufferSyncDevice.hpp new file mode 100644 index 00000000..50d8e970 --- /dev/null +++ b/src/include/kompute/operations/OpBufferSyncDevice.hpp @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include "kompute/operations/OpBase.hpp" + +namespace kp { + +class OpBufferSyncDevice : public OpBase +{ + public: + OpBufferSyncDevice( + vk::Buffer *primaryBuffer, + vk::Buffer *stagingBuffer, + vk::DeviceSize size); + + /** + * Default destructor. This class does not manage memory so it won't be + * expecting the parent to perform a release. + */ + ~OpBufferSyncDevice() override; + + /** + * For device buffers, it records the copy command for the buffer to copy + * the data from its staging to device memory. + * + * @param commandBuffer The command buffer to record the command into. + */ + void record(const vk::CommandBuffer& commandBuffer) override; + + /** + * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void preEval(const vk::CommandBuffer& commandBuffer) override; + + /** + * Does not perform any postEval commands. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void postEval(const vk::CommandBuffer& commandBuffer) override; + + private: + vk::Buffer *mPrimaryBuffer; + vk::Buffer *mStagingBuffer; + vk::DeviceSize mSize; +}; + +} // End namespace kp diff --git a/src/include/kompute/operations/OpBufferSyncLocal.hpp b/src/include/kompute/operations/OpBufferSyncLocal.hpp new file mode 100644 index 00000000..7db99719 --- /dev/null +++ b/src/include/kompute/operations/OpBufferSyncLocal.hpp @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include "kompute/operations/OpBase.hpp" + +namespace kp { + +class OpBufferSyncLocal : public OpBase +{ + public: + OpBufferSyncLocal( + vk::Buffer *primaryBuffer, + vk::Buffer *stagingBuffer, + vk::DeviceSize size); + + /** + * Default destructor. This class does not manage memory so it won't be + * expecting the parent to perform a release. + */ + ~OpBufferSyncLocal() override; + + /** + * For device buffers, it records the copy command for the buffer to copy + * the data from its staging to device memory. + * + * @param commandBuffer The command buffer to record the command into. + */ + void record(const vk::CommandBuffer& commandBuffer) override; + + /** + * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void preEval(const vk::CommandBuffer& commandBuffer) override; + + /** + * Does not perform any postEval commands. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void postEval(const vk::CommandBuffer& commandBuffer) override; + + private: + vk::Buffer *mPrimaryBuffer; + vk::Buffer *mStagingBuffer; + vk::DeviceSize mSize; +}; + +} // End namespace kp diff --git a/src/include/kompute/operations/OpTensorFill.hpp b/src/include/kompute/operations/OpTensorFill.hpp new file mode 100644 index 00000000..9a6bf131 --- /dev/null +++ b/src/include/kompute/operations/OpTensorFill.hpp @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include "kompute/Core.hpp" + +#include "kompute/Tensor.hpp" + +#include "kompute/operations/OpBase.hpp" + +namespace kp { + +/** + * Operation that fills the tensor + */ +class OpTensorFill : public OpBase +{ + public: + /** + * Default constructor with parameters that provides the core vulkan + * resources and the tensors that will be used in the operation. + * + * @param tensors Tensors that will be used to create in operation. + */ + OpTensorFill(const std::vector>& tensors); + + /** + * Default destructor. This class does not manage memory so it won't be + * expecting the parent to perform a release. + */ + ~OpTensorFill() override; + + /** + * Records the fill command for tensor. + * + * @param commandBuffer The command buffer to record the command into. + */ + void record(const vk::CommandBuffer& commandBuffer) override; + + /** + * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void preEval(const vk::CommandBuffer& commandBuffer) override; + + /** + * Does not perform any postEval commands. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void postEval(const vk::CommandBuffer& commandBuffer) override; + + private: + // -------------- ALWAYS OWNED RESOURCES + std::vector> mTensors; +}; + +} // End namespace kp diff --git a/src/logger/CMakeLists.txt b/src/logger/CMakeLists.txt index 1dcc1e6b..1f8695ac 100644 --- a/src/logger/CMakeLists.txt +++ b/src/logger/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.20) set(LOGGER_SOURCES Logger.cpp) -add_library(kp_logger ${LOGGER_SOURCES}) +add_library(kp_logger STATIC ${LOGGER_SOURCES}) # Define log levels in code add_compile_definitions(KOMPUTE_LOG_LEVEL_TRACE=0) diff --git a/src/shaders/glsl/CMakeLists.txt b/src/shaders/glsl/CMakeLists.txt index 3101a2b1..78bdf5ab 100644 --- a/src/shaders/glsl/CMakeLists.txt +++ b/src/shaders/glsl/CMakeLists.txt @@ -22,5 +22,5 @@ add_library(kp_shader INTERFACE "${CMAKE_CURRENT_BINARY_DIR}/ShaderOpMult.hpp" target_include_directories(kp_shader INTERFACE $) # Make sure we install shaders: -install(FILES $/ShaderOpMult.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -install(FILES $/ShaderLogisticRegression.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +#install(FILES $/ShaderOpMult.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +#install(FILES $/ShaderLogisticRegression.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})