diff --git a/.gitmodules b/.gitmodules index 4a30702..ecd99a7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ [submodule "source_third_party/khronos/vulkan-utilities"] path = source_third_party/khronos/vulkan-utilities url = https://github.com/KhronosGroup/Vulkan-Utility-Libraries/ +[submodule "source_third_party/libGPUCounters"] + path = source_third_party/libGPUCounters + url = https://github.com/ARM-software/libGPUCounters.git diff --git a/layer_gpu_profile/CMakeLists.txt b/layer_gpu_profile/CMakeLists.txt new file mode 100644 index 0000000..e2d2bed --- /dev/null +++ b/layer_gpu_profile/CMakeLists.txt @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: MIT +# ----------------------------------------------------------------------------- +# Copyright (c) 2024-2025 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ----------------------------------------------------------------------------- + +cmake_minimum_required(VERSION 3.19) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRES ON) + +project(VkLayerGPUProfile VERSION 1.0.0) + +# Common configuration +set(LGL_LOG_TAG "VkLayerGPUProfile") +set(LGL_CONFIG_TRACE 0) +set(LGL_CONFIG_LOG 1) + +include(../source_common/compiler_helper.cmake) +include(../cmake/clang-tools.cmake) + +# Build steps +add_subdirectory(../source_third_party/libGPUCounters source_third_party/libGPUCounters) + +add_subdirectory(../source_common/comms source_common/comms) +add_subdirectory(../source_common/framework source_common/framework) +add_subdirectory(../source_common/trackers source_common/trackers) + +add_subdirectory(source) diff --git a/layer_gpu_profile/README_LAYER.md b/layer_gpu_profile/README_LAYER.md new file mode 100644 index 0000000..b78f6cc --- /dev/null +++ b/layer_gpu_profile/README_LAYER.md @@ -0,0 +1,134 @@ +# Layer: GPU Profile + +This layer is a frame profiler that can capture per workload performance +counters for selected frames running on an Arm GPU. + +## What devices are supported? + +This layer requires Vulkan 1.0 and an Arm GPU because it uses an Arm-specific +counter sampling library. + +## What data can be collected? + +The layer serializes workloads for instrumented frames and injects counter +samples between them, allowing the layer to measure the hardware cost of +render passes, compute dispatches, transfers, etc. + +The serialization is very invasive to wall-clock performance, due to removal +of pipeline overlap between workloads and additional GPU idle time waiting for +the layer to performs each performance counter sampling operation. This will +have an impact on the counter data being captured! + +Derived counters that show queue and functional unit utilization as a +percentage of the overall "active" time of their parent block will report low +because of time spent refilling and then draining the GPU pipeline between +workloads. The overall _GPU Active Cycles_ counter is known to be unreliable, +because the serialization means that command stream setup and teardown costs +are not hidden in the shadow of surrounding work. We recommend using the +individual queue active cycles counters as the main measure of performance. + +Note that any counter that measure direct work, such as architectural issue +cycles, or workload nouns, such as primitives or threads, should be unaffected +by the loss of pipelining. + +Arm GPUs provide a wide range of performance counters covering many different +aspects of hardware performance. The layer will collect a standard set of +counters by default but, with source modification, can collect any of the +hardware counters and derived expressions supported by the +[libGPUCounters][LGC] library that Arm provides on GitHub. + +[LGC]: https://github.com/ARM-software/libGPUCounters + +### GPU clock frequency impact + +The GPU idle time waiting for the CPU to take a counter sample can cause the +system DVFS power governor to decide that the GPU is not busy. In production +devices we commonly see that the GPU will be down-clocked during the +instrumented frame, which may have an impact on a subset of the available +performance counters. + +When running on a pre-production device we recommend pinning CPU, GPU, and bus +clock speeds to avoid the performance instability. + +## How do I use the layer? + +### Prerequisites + +Device setup steps: + +* Ensure your Android device is in developer mode, with `adb` support enabled + in developer settings. +* Ensure the Android device is connected to your development workstation, and + visible to `adb` with an authorized debug connection. + +Application setup steps: + +* Build a debuggable build of your application and install it on the Android + device. + +Tooling setup steps + +* Install the Android platform tools and ensure `adb` is on your `PATH` + environment variable. +* Install the Android NDK and set the `ANDROID_NDK_HOME` environment variable + to its installation path. + +### Layer build + +Build the Profile layer for Android using the provided build script, or using +equivalent manual commands, from the `layer_gpu_profile` directory. For full +instructions see the _Build an Android layer_ and _Build a Linux layer_ +sections in the [Build documentation](../docs/building.md). + +### Running using the layer + +You can configure a device to run a profile by using the Android helper utility +found in the root directory to configure the layer and manage the application. +You must enable the profile layer, and provide a configuration file to +parameterize it. + +```sh +python3 lgl_android_install.py --layer layer_gpu_profile --config --profile +``` + +The [`layer_config.json`](layer_config.json) file in this directory is a +template configuration file you can start from. It defaults to periodic +sampling every 600 frames, but you can modify this to suit your needs. + +The `--profile` option specifies an output directory on the host to contain +the CSV files written by the tool. One CSV is written for each frame, each CSV +containing a table with one row per workload profiled in the frame, listed +in API submit order. + +The Android helper utility contains many other options for configuring the +application under test and the capture process. For full instructions see the +[Running on Android documentation](../docs/running_android.md). + +## Layer configuration + +The current layer supports two `sampling_mode` values: + +* `periodic_frame`: Sample every N frames. +* `frame_list`: Sample specific frames. + +When `mode` is `periodic_frame` the integer value of the `periodic_frame` key +defines the frame sampling period. The integer value of the +`periodic_min_frame` key defines the first possible frame that could be +profiled, allowing profiles to skip over any loading frames. By default frame 0 +is ignored. + +When `mode` is `frame_list` the value of the `frame_list` key defines a list +of integers giving the specific frames to capture. + +## Layer counters + +The current layer uses a hard-coded set of performance counters defined in the +`Device` class constructor. If you wish to collect different counters you must +edit the [Device source](./source.device.cpp) and rebuild the layer. + +Any counters that are specified but that are not available on the current GPU +will be ignored. + +- - - + +_Copyright © 2025, Arm Limited and contributors._ diff --git a/layer_gpu_profile/android_build.sh b/layer_gpu_profile/android_build.sh new file mode 100644 index 0000000..6ec8c85 --- /dev/null +++ b/layer_gpu_profile/android_build.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MIT +# ---------------------------------------------------------------------------- +# Copyright (c) 2024-2025 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Configuration + +# Exit immediately if any component command errors +set -e + +BUILD_DIR_64=build_arm64 +BUILD_DIR_PACK=build_package + +# ---------------------------------------------------------------------------- +# Process command line options +if [ "$#" -lt 1 ]; then + BUILD_TYPE=Release +else + BUILD_TYPE=$1 +fi + +# Process command line options +if [ "$#" -lt 2 ]; then + PACKAGE=0 +else + PACKAGE=$2 +fi + +if [ "${PACKAGE}" -gt "0" ]; then + echo "Building a ${BUILD_TYPE} build with packaging" +else + echo "Building a ${BUILD_TYPE} build without packaging" +fi + +# ---------------------------------------------------------------------------- +# Build the 64-bit layer +mkdir -p ${BUILD_DIR_64} +pushd ${BUILD_DIR_64} + +cmake \ + -DCMAKE_SYSTEM_NAME=Android \ + -DANDROID_PLATFORM=29 \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_TOOLCHAIN=clang \ + -DANDROID_STL=c++_static \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \ + -DCMAKE_WARN_DEPRECATED=OFF \ + .. + +make -j16 + +popd + +# ---------------------------------------------------------------------------- +# Build the release package +if [ "${PACKAGE}" -gt "0" ]; then + # Setup the package directories + mkdir -p ${BUILD_DIR_PACK}/bin/android/arm64 + + # Install the 64-bit layer + cp ${BUILD_DIR_64}/source/*.so ${BUILD_DIR_PACK}/bin/android/arm64 +fi diff --git a/layer_gpu_profile/docs/developer-docs.md b/layer_gpu_profile/docs/developer-docs.md new file mode 100644 index 0000000..84bce10 --- /dev/null +++ b/layer_gpu_profile/docs/developer-docs.md @@ -0,0 +1,127 @@ +# Layer: GPU Profile - Developer Documentation + +This layer is used to profile Arm GPUs, providing API correlated performance +data. This page provides documentation for developers working on creating and +maintaining the layer. + +## Measuring performance + +Arm GPUs can run multiple workloads in parallel, if the application pipeline +barriers allow it. This is good for overall frame performance, but it makes +profiling data messy due to cross-talk between unrelated workloads. + +For profiling we therefore inject serialization points between workloads to +ensure that data corresponds to a single workload. Note that we can only +serialize within the current application process, so data could still be +perturbed by other processes using the GPU. + +### Sampling performance counters + +This layer will sample performance counters between each workload but, because +sampling is a CPU-side operation, it must trap back to the CPU to make the +counter sample. The correct way to implement this in Vulkan is to split the +application command buffer into multiple command buffers, each containing a +single workload. However, rewriting the command stream like this is expensive +in terms of CPU overhead caused by the state tracking. + +Instead we rely on an undocumented extension supported by Arm GPUs which +allows the CPU to set/wait on events in a submitted but not complete command +buffer. The layer injects a `vkCmdSetEvent(A)` and `vkCmdWaitEvent(B)` pair +between each workload in the command buffer, and then has the reverse +`vkWaitEvent(A)` and `vkSetEvent(B)` pair on the CPU side. The counter sample +can be inserted in between the two CPU-side operations. Note that there is no +blocking CPU-side wait for an event so `vkWaitEvent()` is really a polling loop +around `vkGetEventStatus()`. + +```mermaid +sequenceDiagram + actor CPU + actor GPU + CPU->>CPU: vkGetEventStatus(A) + Note over GPU: Run workload + GPU->>CPU: vkCmdSetEvent(A) + GPU->>GPU: vkCmdWaitEvent(B) + Note over CPU: Take sample + CPU->>GPU: vkSetEvent(B) + Note over GPU: Start next workload +``` + +### Performance implications + +Serializing workloads usually means that individual workloads will run with +lower completion latency, because they are no longer contending for resources. +However, loss of pipelining and overlap means that overall frame latency will +increase compared to a well overlapped scenario. + +In addition, serializing workloads and then trapping back to the CPU to +sample performance counters will cause the GPU to go idle waiting for the CPU +to complete the counter sample. This makes the GPU appear underutilized to the +system DVFS governor, which may subsequently decide to reduce the GPU clock +frequency. On pre-production devices we recommend locking CPU, GPU and memory +clock frequencies to avoid this problem. + +```mermaid +--- +displayMode: compact +--- +gantt + dateFormat x + axisFormat %Lms + section CPU + Sample: a1, 0, 2ms + Sample: a2, after w1, 2ms + section GPU + Workload 1:w1, after a1, 10ms + Workload 2:w2, after a2, 10ms +``` + +## Software architecture + +The basic architecture for this layer is an extension of the timeline layer, +using a layer command stream (LCS) recorded alongside each command buffer to +define the software operations that the layer needs to perform at submit time. + +Because counter sampling is handled synchronously on the CPU when a frame is +being profiled, the layer handles each `vkQueueSubmit` and its associated +counter samples synchronously at submit time before returning to the +application. When sampling the layer retains the layer lock when calling into +the driver, ensuring that only one thread at a time can process a submit that +makes counter samples. + +## Event handling + +To implement this functionality, the layer allocates two `VkEvent` objects to +support the CPU<->GPU handover for counter sampling. These events are reset and +reused for all counter samples to avoid managing many different events. + +```c +CPU GPU +=== === + // Workload 1 + vkCmdSetEvent(A) +// Spin test until set +vkGetEventStatus(A) +vkResetEvent(A) + +// Sample counters + +vSetEvent(B) + // Block until set + vkCmdWaitEvent(B) + vkCmdResetEvent(B) + + // Workload 2 +``` + +Due to an errata in the interaction between the counter sampling and power +management in some older kernel driver versions, Arm GPUs with the CSF frontend +and a driver older than r54p0 need a sleep after successfully waiting on +event A and before sampling any counters. + +Initial investigations seem to show that the shortest reliable sleep is 3ms, so +this is quite a high overhead for applications with many workloads and +therefore should be enabled conditionally only for CSF GPUs with a driver older +than r54p0. + +- - - +_Copyright © 2025, Arm Limited and contributors._ diff --git a/layer_gpu_profile/layer_config.json b/layer_gpu_profile/layer_config.json new file mode 100644 index 0000000..c24a31a --- /dev/null +++ b/layer_gpu_profile/layer_config.json @@ -0,0 +1,7 @@ +{ + "layer": "VK_LAYER_LGL_gpu_profile", + "sample_mode": "periodic_frame", + "periodic_min_frame": 1, + "periodic_frame": 600, + "frame_list": [] +} diff --git a/layer_gpu_profile/manifest.json b/layer_gpu_profile/manifest.json new file mode 100644 index 0000000..411a4ae --- /dev/null +++ b/layer_gpu_profile/manifest.json @@ -0,0 +1,11 @@ +{ + "file_format_version": "1.0.0", + "layer": { + "name": "VK_LAYER_LGL_gpu_profile", + "type": "INSTANCE", + "library_path": "libVkLayerGPUProfile.so", + "api_version": "1.0.0", + "implementation_version": "1", + "description": "Layer for generating Arm GPU profiling data" + } +} diff --git a/layer_gpu_profile/source/CMakeLists.txt b/layer_gpu_profile/source/CMakeLists.txt new file mode 100644 index 0000000..c0cb5f8 --- /dev/null +++ b/layer_gpu_profile/source/CMakeLists.txt @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: MIT +# ----------------------------------------------------------------------------- +# Copyright (c) 2024-2025 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ----------------------------------------------------------------------------- + +# Set output file names +if (CMAKE_BUILD_TYPE STREQUAL "Release") + set(VK_LAYER VkLayerGPUProfile_sym) + set(VK_LAYER_STRIP libVkLayerGPUProfile.so) +else() + set(VK_LAYER VkLayerGPUProfile) +endif() + +# Set strings used by configure +set(LGL_LAYER_NAME_STR "VK_LAYER_LGL_gpu_profile") +set(LGL_LAYER_DESC_STR "VkLayerGPUProfile by LGL") + +# Vulkan layer library +configure_file( + version.hpp.in + version.hpp + ESCAPE_QUOTES @ONLY) + +add_library( + ${VK_LAYER} SHARED + ../../source_common/framework/entry.cpp + device.cpp + instance.cpp + layer_comms.cpp + layer_config.cpp + layer_device_functions_command_buffer.cpp + layer_device_functions_command_pool.cpp + layer_device_functions_debug.cpp + layer_device_functions_dispatch.cpp + layer_device_functions_queue.cpp + layer_device_functions_render_pass.cpp + layer_device_functions_trace_rays.cpp + layer_device_functions_transfer.cpp + submit_visitor.cpp) + +target_include_directories( + ${VK_LAYER} PRIVATE + ./ + ../../source_common/ + ${CMAKE_CURRENT_BINARY_DIR}) + +target_include_directories( + ${VK_LAYER} SYSTEM PRIVATE + ../../source_third_party/ + ../../source_third_party/khronos/vulkan/include/ + ../../source_third_party/khronos/vulkan-utilities/include/ + ../../source_third_party/libGPUCounters/backend/device/include/ + ../../source_third_party/libGPUCounters/hwcpipe/include/ + ../../source_third_party/protopuf/include/) + +# We use libGPUCounters in the device structure, so add to framework includes +target_include_directories( + lib_layer_framework SYSTEM PRIVATE + ../../source_third_party/libGPUCounters/backend/device/include/ + ../../source_third_party/libGPUCounters/hwcpipe/include/) + +lgl_set_build_options(${VK_LAYER}) + +target_link_libraries( + ${VK_LAYER} + lib_layer_comms + lib_layer_framework + lib_layer_trackers + device + hwcpipe + $<$:log>) + +if (CMAKE_BUILD_TYPE STREQUAL "Release") + add_custom_command( + TARGET "${VK_LAYER}" POST_BUILD + COMMAND ${CMAKE_STRIP} + ARGS --strip-all -o ${VK_LAYER_STRIP} $ + COMMENT "Stripped lib${VK_LAYER}.so to ${VK_LAYER_STRIP}") +endif() + +add_clang_tools() diff --git a/layer_gpu_profile/source/device.cpp b/layer_gpu_profile/source/device.cpp new file mode 100644 index 0000000..b040669 --- /dev/null +++ b/layer_gpu_profile/source/device.cpp @@ -0,0 +1,226 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ +#include +#include +#include + +#include + +#include "comms/comms_module.hpp" +#include "device.hpp" +#include "framework/manual_functions.hpp" +#include "framework/utils.hpp" +#include "instance.hpp" + +using json = nlohmann::json; + +/** + * @brief The dispatch lookup for all of the created Vulkan devices. + */ +static std::unordered_map> g_devices; + +/* See header for documentation. */ +const std::vector Device::createInfoPatches {}; + +/* See header for documentation. */ +std::unique_ptr Device::commsModule; + +/* See header for documentation. */ +std::unique_ptr Device::commsWrapper; + +/* See header for documentation. */ +void Device::store(VkDevice handle, std::unique_ptr device) +{ + void* key = getDispatchKey(handle); + g_devices.insert({key, std::move(device)}); +} + +/* See header for documentation. */ +Device* Device::retrieve(VkDevice handle) +{ + void* key = getDispatchKey(handle); + assert(isInMap(key, g_devices)); + return g_devices.at(key).get(); +} + +/* See header for documentation. */ +Device* Device::retrieve(VkQueue handle) +{ + void* key = getDispatchKey(handle); + assert(isInMap(key, g_devices)); + return g_devices.at(key).get(); +} + +/* See header for documentation. */ +Device* Device::retrieve(VkCommandBuffer handle) +{ + void* key = getDispatchKey(handle); + assert(isInMap(key, g_devices)); + return g_devices.at(key).get(); +} + +/* See header for documentation. */ +void Device::destroy(Device* device) +{ + g_devices.erase(getDispatchKey(device)); +} + +/* See header for documentation. */ +Device::Device(Instance* _instance, + VkPhysicalDevice _physicalDevice, + VkDevice _device, + PFN_vkGetDeviceProcAddr nlayerGetProcAddress, + const VkDeviceCreateInfo& createInfo) + : instance(_instance), + physicalDevice(_physicalDevice), + device(_device) +{ + UNUSED(createInfo); + + initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver); + + // Emit a log if debug utils entry points did not load. In this scenario + // the layer will still be loaded and send metadata packets to the server + // socket, but the Perfetto data will not contain any tag labels. We will + // therefore be unable to cross-reference the two data streams to produce a + // usable visualization. + if (!driver.vkCmdBeginDebugUtilsLabelEXT) + { + LAYER_LOG(" - ERROR: Device does not expose VK_EXT_debug_utils"); + LAYER_LOG(" Profiling will not contain debug labels"); + } + + // Init the shared comms module for the first device built + if (!commsModule) + { + commsModule = std::make_unique("lglcomms"); + commsWrapper = std::make_unique(*commsModule); + } + + // Create events for CPU<>GPU synchronization + VkEventCreateInfo eventCreateInfo { + .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; + + auto result = driver.vkCreateEvent(device, &eventCreateInfo, nullptr, &gpuToCpuEvent); + if (result != VK_SUCCESS) + { + LAYER_ERR("Failed vkCreateEvent() for gpu->cpu synchronization"); + } + + result = driver.vkCreateEvent(device, &eventCreateInfo, nullptr, &cpuToGpuEvent); + if (result != VK_SUCCESS) + { + LAYER_ERR("Failed vkCreateEvent() for cpu->gpu synchronization"); + } + + // Create the counter context + lgcGpu = std::make_unique(0); + if (!lgcGpu->valid()) + { + LAYER_ERR("Failed libGPUCounters GPU context creation"); + return; + } + + // Create the counter sampler config + auto config = hwcpipe::sampler_config(*lgcGpu.get()); + + LAYER_LOG("Configuring libGPUCounters:"); + + // Queue cycles, not all of these are available on all GPUs - the ones that + // are not available will be transparently dropped + addCounter(config, MaliCompQueueActiveCy, "Compute queue active cycles"); + addCounter(config, MaliVertQueueActiveCy, "Vertex queue active cycles"); + addCounter(config, MaliBinningQueueActiveCy, "Binning phase queue active cycles"); + addCounter(config, MaliNonFragQueueActiveCy, "Non-fragment queue active cycles"); + addCounter(config, MaliFragQueueActiveCy, "Fragment queue active cycles"); + addCounter(config, MaliMainQueueActiveCy, "Main phase queue active cycles"); + + // External bandwidth + addCounter(config, MaliExtBusRdBy, "External read bytes"); + addCounter(config, MaliExtBusWrBy, "External write bytes"); + + // Primitive counts + addCounter(config, MaliGeomTotalPrim, "Input primitives"); + addCounter(config, MaliGeomVisiblePrim, "Visible primitives"); + + // Thread counts + addCounter(config, MaliNonFragThread, "Non-fragment threads"); + addCounter(config, MaliFragThread, "Fragment threads"); + + // Functional unit counters + // TODO HIVE-1307: Currently libGPUCounters doesn't expose a MaliALUIssueCy + // counter, so we use instruction counts as a measure of relative + // arithmetic complexity across workloads, but note that it is not directly + // comparable with the other "* unit cycles" counters. + addCounter(config, MaliEngInstr, "Arithmetic unit instructions"); + addCounter(config, MaliEngFMAInstr, "Arithmetic unit FMA instructions"); + addCounter(config, MaliEngCVTInstr, "Arithmetic unit CVT instructions"); + addCounter(config, MaliEngSFUInstr, "Arithmetic unit SFU instructions"); + addCounter(config, MaliVarIssueCy, "Varying unit cycles"); + addCounter(config, MaliTexIssueCy, "Texture unit cycles"); + addCounter(config, MaliLSIssueCy, "Load/store unit cycles"); + + // Create the counter sampler and set it running + lgcSampler = std::make_unique>(config); + auto ec = lgcSampler->start_sampling(); + if (ec) + { + LAYER_ERR("Failed libGPUCounters GPU sampler creation"); + } + + // Configure frame selection here so we can profile frame zero + isFrameOfInterest = instance->config.isFrameOfInterest(0); + + // Start the next frame if it is "of interest" + if (isFrameOfInterest) + { + json startFrameMessage { + { "type", "start_frame" }, + { "frame", 0 }, + }; + + txMessage(startFrameMessage.dump()); + } +} + +void Device::addCounter( + hwcpipe::sampler_config& config, + hwcpipe_counter counterID, + const char* counterName +) { + auto ec = config.add_counter(counterID); + if (ec) + { + LAYER_LOG(" - %s not available", counterName); + } + else + { + LAYER_LOG(" + %s selected", counterName); + lgcActiveCounters.emplace_back(counterID, counterName); + } +} diff --git a/layer_gpu_profile/source/device.hpp b/layer_gpu_profile/source/device.hpp new file mode 100644 index 0000000..b52a47a --- /dev/null +++ b/layer_gpu_profile/source/device.hpp @@ -0,0 +1,249 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file Declares the root class for layer management of VkDevice objects. + * + * Role summary + * ============ + * + * Devices represent the core context used by the application to connect to the + * underlying graphics driver. A device object is the dispatch root for the + * Vulkan driver, so device commands all take some form of dispatchable handle + * that can be resolved into a unique per-device key. For the driver this key + * would simply be a pointer directly to the driver-internal device object, but + * for our layer we use a device dispatch key as an index in to the map to find + * the layer's driver object. + * + * Key properties + * ============== + * + * Vulkan devices are designed to be used concurrently by multiple application + * threads. An application can have multiple concurrent devices, and use each + * device from multiple threads. + * + * Access to the layer driver structures must therefore be kept thread-safe. + * For sake of simplicity, we generally implement this by: + * - Holding a global lock whenever any thread is inside layer code. + * - Releasing the global lock whenever the layer calls a driver function. + */ + +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include + +#include "layer_comms.hpp" +#include "comms/comms_module.hpp" +#include "framework/device_dispatch_table.hpp" +#include "instance.hpp" +#include "trackers/device.hpp" + +/** + * @brief Function pointer type for patching VkDeviceCreateInfo. + */ +using DeviceCreatePatchPtr = void (*)(Instance& instance, + VkPhysicalDevice physicalDevice, + vku::safe_VkDeviceCreateInfo& createInfo, + std::vector& supported); + +/** + * @brief This class implements the layer state tracker for a single device. + */ +class Device +{ +public: + /** + * @brief Store a new device into the global store of dispatchable devices. + * + * @param handle The dispatchable device handle to use as an indirect key. + * @param device The @c Device object to store. + */ + static void store(VkDevice handle, std::unique_ptr device); + + /** + * @brief Fetch a device from the global store of dispatchable devices. + * + * @param handle The dispatchable device handle to use as an indirect lookup. + * + * @return The layer device context. + */ + static Device* retrieve(VkDevice handle); + + /** + * @brief Fetch a device from the global store of dispatchable devices. + * + * @param handle The dispatchable queue handle to use as an indirect lookup. + * + * @return The layer device context. + */ + static Device* retrieve(VkQueue handle); + + /** + * @brief Fetch a device from the global store of dispatchable devices. + * + * @param handle The dispatchable command buffer handle to use as an indirect lookup. + * + * @return The layer device context. + */ + static Device* retrieve(VkCommandBuffer handle); + + /** + * @brief Drop a device from the global store of dispatchable devices. + * + * @param device The device to drop. + */ + static void destroy(Device* device); + + /** + * @brief Create a new layer device object. + * + * Create info is transient, so the constructor must copy what it needs. + * + * @param instance The layer instance object this device is created with. + * @param physicalDevice The physical device this logical device is for. + * @param device The device handle this device is created with. + * @param nlayerGetProcAddress The vkGetDeviceProcAddress function for the driver. + * @param createInfo The create info used to create the device. + */ + Device(Instance* instance, + VkPhysicalDevice physicalDevice, + VkDevice device, + PFN_vkGetDeviceProcAddr nlayerGetProcAddress, + const VkDeviceCreateInfo& createInfo); + + /** + * @brief Destroy this layer device object. + */ + ~Device() = default; + + /** + * @brief Callback for sending some message for the device. + * + * @param message The message to send. + */ + void txMessage(const std::string& message) + { + commsWrapper->txMessage(message); + } + + /** + * @brief Get the cumulative stats for this device. + */ + Tracker::Device& getStateTracker() { return stateTracker; } + +private: + + /** + * @brief Add a counter to the list of counters, if available. + * + * @param samplerConfig The sampler configuration to query. + * @param counterID The counter to add to the configuration. + * @param counterName The human-readable counter name. + */ + void addCounter( + hwcpipe::sampler_config& samplerConfig, + hwcpipe_counter counterID, + const char* counterName); + +public: + /** + * @brief The instance this device is created with. + */ + const Instance* instance; + + /** + * @brief The physical device this device is created with. + */ + const VkPhysicalDevice physicalDevice; + + /** + * @brief The device handle this device is created with. + */ + const VkDevice device; + + /** + * @brief The driver function dispatch table. + */ + DeviceDispatchTable driver {}; + + /** + * @brief The set of VkCreateDeviceInfo patches needed by this layer. + */ + static const std::vector createInfoPatches; + + /** + * @brief Is this frame being profiled? + */ + bool isFrameOfInterest {false}; + + /** + * @brief The event needed to sync execution from GPU to CPU. + */ + VkEvent gpuToCpuEvent; + + /** + * @brief The event needed to sync execution from CPU back to GPU. + */ + VkEvent cpuToGpuEvent; + + /** + * @brief The GPU connection for counter sampling. + */ + std::unique_ptr lgcGpu; + + /** + * @brief The GPU sampler for counter sampling. + */ + std::unique_ptr> lgcSampler; + + /** + * @brief The active GPU counters for sampling. + */ + std::vector> lgcActiveCounters; + +private: + /** + * @brief State tracker for this device. + */ + Tracker::Device stateTracker; + + /** + * @brief Shared network communications module. + */ + static std::unique_ptr commsModule; + + /** + * @brief Shared network communications message encoder. + */ + static std::unique_ptr commsWrapper; +}; diff --git a/layer_gpu_profile/source/device_utils.hpp b/layer_gpu_profile/source/device_utils.hpp new file mode 100644 index 0000000..f0b3415 --- /dev/null +++ b/layer_gpu_profile/source/device_utils.hpp @@ -0,0 +1,149 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#include "device.hpp" + +#include +#include + +#include + +/** + * @brief Emit workaround sleep if needed. + */ +[[maybe_unused]] static void workaroundDelay() +{ + // We could make this conditional (enable if GPU is CSF and DDK < r54p0). + // However the profile is always going to be invasive, and it's quite a bit + // of added complexity to handle. + std::this_thread::sleep_for(std::chrono::milliseconds(3)); +} + +/** + * @brief Emit the GPU-side trigger/wait for a CPU-side trap. + * + * Note: this relies on an undocumented extension supported by Arm GPUs, which + * allows the CPU to set/wait/reset events in a command buffer after it has + * been submitted to a queue. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + */ +[[maybe_unused]] static void emitCPUTrap( + Device& layer, + VkCommandBuffer commandBuffer +) { + // Don't instrument outside of active frame of interest + if(!layer.isFrameOfInterest) + { + return; + } + + // Signal the gpuToCpu to wake the CPU to perform its operation + layer.driver.vkCmdSetEvent( + commandBuffer, + layer.gpuToCpuEvent, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); + + // Wait for cpuToGpu to wake the GPU after CPU has finished + layer.driver.vkCmdWaitEvents( + commandBuffer, + 1, + &layer.cpuToGpuEvent, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, nullptr, + 0, nullptr, + 0, nullptr); + + // Reset cpuToGpu so it's ready to use again + layer.driver.vkCmdResetEvent( + commandBuffer, + layer.cpuToGpuEvent, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); +} + +/** + * @brief Perform the CPU-side wait for a CPU-side trap. + * + * Note: this relies on an undocumented extension supported by Arm GPUs, which + * allows the CPU to set/wait/reset events in a command buffer after it has + * been submitted to a queue. + * + * @param layer The layer context for the device. + */ +[[maybe_unused]] static void waitForGPU( + Device& layer +) { + // Wait for gpuToCpu to wake the CPU after GPU has finished + while(true) + { + auto res = layer.driver.vkGetEventStatus(layer.device, layer.gpuToCpuEvent); + if (res == VK_EVENT_SET) + { + break; + } + + if (res != VK_EVENT_RESET) + { + LAYER_LOG("Failed to wait for gpuToCpuEvent"); + } + + // Sleep before polling again + std::this_thread::sleep_for(std::chrono::microseconds(100)); + } + + // Reset gpuToCpu so it's ready to use again + auto res = layer.driver.vkResetEvent(layer.device, layer.gpuToCpuEvent); + if (res != VK_SUCCESS) + { + LAYER_LOG("Failed to reset gpuToCpuEvent"); + } + + // Sleep after event set to workaround counter sync errata on older drivers + workaroundDelay(); +} + +/** + * @brief Perform the CPU-side notify of the GPU after a CPU-side trap. + * + * Note: this relies on an undocumented extension supported by Arm GPUs, which + * allows the CPU to set/wait/reset events in a command buffer after it has + * been submitted to a queue. + * + * @param layer The layer context for the device. + */ +[[maybe_unused]] static void notifyGPU( + Device& layer +) { + // Signal cpuToGpu to wake the GPU to keep processing the command stream + auto res = layer.driver.vkSetEvent(layer.device, layer.cpuToGpuEvent); + if (res != VK_SUCCESS) + { + LAYER_LOG("Failed to notify cpuToGpuEvent"); + } +} diff --git a/layer_gpu_profile/source/instance.cpp b/layer_gpu_profile/source/instance.cpp new file mode 100644 index 0000000..d567bbb --- /dev/null +++ b/layer_gpu_profile/source/instance.cpp @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include "instance.hpp" + +#include "framework/utils.hpp" + +#include + +/** + * @brief The dispatch lookup for all of the created Vulkan instances. + */ +static std::unordered_map> g_instances; + +/* See header for documentation. */ +const APIVersion Instance::minAPIVersion {1, 1}; + +/* See header for documentation. */ +const std::vector Instance::extraExtensions { + VK_EXT_DEBUG_UTILS_EXTENSION_NAME, +}; + +/* See header for documentation. */ +void Instance::store(VkInstance handle, std::unique_ptr& instance) +{ + void* key = getDispatchKey(handle); + g_instances.insert({key, std::move(instance)}); +} + +/* See header for documentation. */ +Instance* Instance::retrieve(VkInstance handle) +{ + void* key = getDispatchKey(handle); + assert(isInMap(key, g_instances)); + return g_instances.at(key).get(); +} + +/* See header for documentation. */ +Instance* Instance::retrieve(VkPhysicalDevice handle) +{ + void* key = getDispatchKey(handle); + assert(isInMap(key, g_instances)); + return g_instances.at(key).get(); +} + +/* See header for documentation. */ +void Instance::destroy(Instance* instance) +{ + g_instances.erase(getDispatchKey(instance->instance)); +} + +/* See header for documentation. */ +Instance::Instance(VkInstance _instance, PFN_vkGetInstanceProcAddr _nlayerGetProcAddress) + : instance(_instance), + nlayerGetProcAddress(_nlayerGetProcAddress) +{ + initDriverInstanceDispatchTable(instance, nlayerGetProcAddress, driver); +} diff --git a/layer_gpu_profile/source/instance.hpp b/layer_gpu_profile/source/instance.hpp new file mode 100644 index 0000000..878c84c --- /dev/null +++ b/layer_gpu_profile/source/instance.hpp @@ -0,0 +1,143 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file + * Declares the root class for layer management of VkInstance objects. + * + * Role summary + * ============ + * + * Instances represent the core context used by the application to connect to + * the OS graphics subsystem prior to connection to a specific device instance. + * An instance object is the dispatch root for the Vulkan subsystem, so + * instance commands all take some form of dispatchable handle that can be + * resolved into a unique per-instance key. For the driver this key would + * simply be a pointer directly to the driver-internal instance object, but for + * our layer we use a instance dispatch key as an index in to the map to find + * the layer's instance object. + * + * Key properties + * ============== + * + * Vulkan instances are designed to be used concurrently by multiple + * application threads. An application can have multiple concurrent instances, + * and use each instance from multiple threads. + * + * Access to the layer driver structures must therefore be kept thread-safe. + * For sake of simplicity, we generally implement this by: + * - Holding a global lock whenever any thread is inside layer code. + * - Releasing the global lock whenever the layer calls a driver function. + */ + +#pragma once + +#include "framework/instance_dispatch_table.hpp" +#include "layer_config.hpp" + +#include +#include + +#include +#include + +/** + * @brief This class implements the layer state tracker for a single instance. + */ +class Instance +{ +public: + /** + * @brief Store a new instance into the global store of dispatchable instances. + * + * @param handle The dispatchable instance handle to use as an indirect key. + * @param instance The @c Instance object to store. + */ + static void store(VkInstance handle, std::unique_ptr& instance); + + /** + * @brief Fetch an instance from the global store of dispatchable instances. + * + * @param handle The dispatchable instance handle to use as an indirect lookup. + * + * @return The layer instance context. + */ + static Instance* retrieve(VkInstance handle); + + /** + * @brief Fetch an instance from the global store of dispatchable instances. + * + * @param handle The dispatchable physical device handle to use as an indirect lookup. + * + * @return The layer instance context. + */ + static Instance* retrieve(VkPhysicalDevice handle); + + /** + * @brief Drop an instance from the global store of dispatchable instances. + * + * @param instance The instance to drop. + */ + static void destroy(Instance* instance); + + /** + * @brief Create a new layer instance object. + * + * @param instance The instance handle this instance is created with. + * @param nlayerGetProcAddress The vkGetProcAddress function in the driver/next layer down. + */ + Instance(VkInstance instance, PFN_vkGetInstanceProcAddr nlayerGetProcAddress); + +public: + /** + * @brief The instance handle this instance is created with. + */ + VkInstance instance; + + /** + * @brief The next layer's \c vkGetInstanceProcAddr() function pointer. + */ + PFN_vkGetInstanceProcAddr nlayerGetProcAddress; + + /** + * @brief The driver function dispatch table. + */ + InstanceDispatchTable driver {}; + + /** + * @brief The layer configuration. + */ + const LayerConfig config; + + /** + * @brief The minimum API version needed by this layer. + */ + static const APIVersion minAPIVersion; + + /** + * @brief The minimum set of instance extensions needed by this layer. + */ + static const std::vector extraExtensions; +}; diff --git a/layer_gpu_profile/source/layer_comms.cpp b/layer_gpu_profile/source/layer_comms.cpp new file mode 100644 index 0000000..98740ac --- /dev/null +++ b/layer_gpu_profile/source/layer_comms.cpp @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include + +#include "layer_comms.hpp" + + +/* See header for documentation. */ +ProfileComms::ProfileComms(Comms::CommsInterface& _comms) + : comms(_comms) +{ + if (comms.isConnected()) + { + endpoint = comms.getEndpointID("GPUProfile"); + } +} + +/* See header for documentation. */ +void ProfileComms::txMessage(const std::string& message) +{ + // Message endpoint is not available + if (endpoint == 0) + { + return; + } + + auto data = std::make_unique(message.begin(), message.end()); + comms.txAsync(endpoint, std::move(data)); +} diff --git a/layer_gpu_profile/source/layer_comms.hpp b/layer_gpu_profile/source/layer_comms.hpp new file mode 100644 index 0000000..1d79d7d --- /dev/null +++ b/layer_gpu_profile/source/layer_comms.hpp @@ -0,0 +1,64 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file Declares a simple comms encoder for the profile layer. + */ + +#pragma once + +#include "comms/comms_interface.hpp" + +/** + * @brief A simple message encoder for the profile layer comms endpoint. + */ +class ProfileComms +{ +public: + /** + * @brief Construct a new encoder. + * + * @param comms The common comms module used by all services. + */ + ProfileComms(Comms::CommsInterface& comms); + + /** + * @brief Send a message to the GPU profile endpoint service. + * + * @param message The message to send. + */ + void txMessage(const std::string& message); + +private: + /** + * @brief The endpoint ID of the service, or 0 if not found. + */ + Comms::EndpointID endpoint {0}; + + /** + * @brief The common module for network messaging. + */ + Comms::CommsInterface& comms; +}; diff --git a/layer_gpu_profile/source/layer_config.cpp b/layer_gpu_profile/source/layer_config.cpp new file mode 100644 index 0000000..1154401 --- /dev/null +++ b/layer_gpu_profile/source/layer_config.cpp @@ -0,0 +1,148 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file + * Defines the a config file to parameterize the layer. + */ + +#include "layer_config.hpp" + +#include "framework/utils.hpp" +#include "utils/misc.hpp" +#include "version.hpp" + +#include +#include +#include + +#include + +/* See header for documentation. */ +void LayerConfig::parseSamplingOptions(const json& config) +{ + // Decode top level options + std::string rawMode = config.at("sample_mode"); + + if (rawMode == "disabled") + { + mode = MODE_DISABLED; + } + else if (rawMode == "periodic_frame") + { + mode = MODE_PERIODIC_FRAME; + periodicFrame = config.at("periodic_frame"); + periodicMinFrame = config.at("periodic_min_frame"); + } + else if (rawMode == "frame_list") + { + mode = MODE_FRAME_LIST; + specificFrames = config.at("frame_list").get>(); + } + else + { + LAYER_ERR("Unknown counter sample_mode: %s", rawMode.c_str()); + rawMode = "disabled"; + } + + LAYER_LOG("Layer sampling configuration"); + LAYER_LOG("============================"); + LAYER_LOG(" - Sample mode: %s", rawMode.c_str()); + + if (mode == MODE_PERIODIC_FRAME) + { + LAYER_LOG(" - Frame period: %" PRIu64, periodicFrame); + LAYER_LOG(" - Minimum frame: %" PRIu64, periodicMinFrame); + } + else if (mode == MODE_FRAME_LIST) + { + std::stringstream result; + std::copy(specificFrames.begin(), specificFrames.end(), std::ostream_iterator(result, " ")); + LAYER_LOG(" - Frames: %s", result.str().c_str()); + } +} + +/* See header for documentation. */ +LayerConfig::LayerConfig() +{ +#ifdef __ANDROID__ + std::string fileName("/data/local/tmp/"); + fileName.append(LGL_LAYER_CONFIG); +#else + std::string fileName(LGL_LAYER_CONFIG); +#endif + + LAYER_LOG("Trying to read config: %s", fileName.c_str()); + + std::ifstream stream(fileName); + if (!stream) + { + LAYER_LOG("Failed to open layer config, using defaults"); + return; + } + + json data; + + try + { + data = json::parse(stream); + } + catch (const json::parse_error& e) + { + LAYER_ERR("Failed to load layer config, using defaults"); + LAYER_ERR("Error: %s", e.what()); + return; + } + + try + { + parseSamplingOptions(data); + } + catch (const json::out_of_range& e) + { + LAYER_ERR("Failed to read feature config, using defaults"); + LAYER_ERR("Error: %s", e.what()); + } +} + +/* See header for documentation. */ +bool LayerConfig::isFrameOfInterest( + uint64_t frameID +) const { + switch(mode) + { + case MODE_DISABLED: + return false; + case MODE_PERIODIC_FRAME: + return (frameID >= periodicMinFrame) && + ((frameID % periodicFrame) == 0); + case MODE_FRAME_LIST: + return isIn(frameID, specificFrames); + } + + // Should never reach here + return false; +} + diff --git a/layer_gpu_profile/source/layer_config.hpp b/layer_gpu_profile/source/layer_config.hpp new file mode 100644 index 0000000..2c54cb3 --- /dev/null +++ b/layer_gpu_profile/source/layer_config.hpp @@ -0,0 +1,100 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file + * Declares the a config file to parameterize the layer. + */ + +#pragma once + +#include +using json = nlohmann::json; + +/** + * @brief This class implements a config interface for this layer. + * + * The layer contains a default config, but users can provide a JSON config + * file on the file system which is loaded at init time. + * - On Android the file is loaded from /data/local/tmp. + * - On Linux the file is loaded from the current working directory. + */ +class LayerConfig +{ +public: + /** + * @brief Create a new layer config. + */ + LayerConfig(); + + /** + * @brief Test if next frame should be profiled. + * + * @param frameID The index of the next frame. + * + * @return True if profiling should be enabled, False otherwise. + */ + bool isFrameOfInterest(uint64_t frameID) const; + +private: + /** + * @brief Supported sampling modes. + */ + enum SamplingMode + { + MODE_DISABLED, + MODE_FRAME_LIST, + MODE_PERIODIC_FRAME + }; + + /** + * @brief Parse the configuration options for the sampling module. + * + * @param config The JSON configuration. + * + * @throws json::out_of_bounds if required fields are missing. + */ + void parseSamplingOptions(const json& config); + + /** + * @brief The sampling mode. + */ + SamplingMode mode {MODE_DISABLED}; + + /** + * @brief The sampling period in frames, or 0 if disabled. + */ + uint64_t periodicFrame {0}; + + /** + * @brief The minimum frame to sample (inclusive). + */ + uint64_t periodicMinFrame {0}; + + /** + * @brief The sampling frame list, or empty if disabled. + */ + std::vector specificFrames; +}; diff --git a/layer_gpu_profile/source/layer_device_functions.hpp b/layer_gpu_profile/source/layer_device_functions.hpp new file mode 100644 index 0000000..ff9781d --- /dev/null +++ b/layer_gpu_profile/source/layer_device_functions.hpp @@ -0,0 +1,413 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#include + +// Functions for command pools + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool(VkDevice device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCommandPool); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool(VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool(VkDevice device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator); + +// Functions for command buffers + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL + layer_vkAllocateCommandBuffers(VkDevice device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult layer_vkBeginCommandBuffer(VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands(VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer(VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers(VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers); + +// Functions for render passes + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass(VkDevice device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2(VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR(VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass(VkDevice device, + VkRenderPass renderPass, + const VkAllocationCallbacks* pAllocator); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering(VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR(VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass(VkCommandBuffer commandBuffer); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass2(VkCommandBuffer commandBuffer, + const VkSubpassEndInfo* pSubpassEndInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass2KHR(VkCommandBuffer commandBuffer, + const VkSubpassEndInfo* pSubpassEndInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRendering(VkCommandBuffer commandBuffer); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderingKHR(VkCommandBuffer commandBuffer); + +// Functions for compute dispatches + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch(VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase(VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR(VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset); + +// Commands for trace rays + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, + VkDeviceAddress indirectDeviceAddress); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + VkDeviceAddress indirectDeviceAddress); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdTraceRaysKHR(VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + uint32_t width, + uint32_t height, + uint32_t depth); + +// Commands for acceleration structure builds + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresIndirectKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkDeviceAddress* pIndirectDeviceAddresses, + const uint32_t* pIndirectStrides, + const uint32_t* const* ppMaxPrimitiveCounts); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos); + +// Commands for transfers + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + uint32_t data); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage(VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage(VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer(VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2(VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage(VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyBufferToImage2(VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2(VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR(VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureInfoKHR* pInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo); + +// Functions for debug + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, + const VkDebugMarkerMarkerInfoEXT* pMarkerInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, + const VkDebugUtilsLabelEXT* pLabelInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer); + +// Functions for queues + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL + layer_vkQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL + layer_vkQueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR(VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence); diff --git a/layer_gpu_profile/source/layer_device_functions_command_buffer.cpp b/layer_gpu_profile/source/layer_device_functions_command_buffer.cpp new file mode 100644 index 0000000..de0f4f4 --- /dev/null +++ b/layer_gpu_profile/source/layer_device_functions_command_buffer.cpp @@ -0,0 +1,152 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include "device.hpp" +#include "framework/device_dispatch_table.hpp" + +#include + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL + layer_vkAllocateCommandBuffers(VkDevice device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult result = layer->driver.vkAllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers); + if (result != VK_SUCCESS) + { + return result; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) + { + tracker.allocateCommandBuffer(pAllocateInfo->commandPool, pCommandBuffers[i]); + } + + return result; +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult layer_vkBeginCommandBuffer(VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo) +{ + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer); + cmdBuffer.reset(); + cmdBuffer.begin(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkBeginCommandBuffer(commandBuffer, pBeginInfo); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandBuffer(VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& cmdBuffer = tracker.getCommandBuffer(commandBuffer); + cmdBuffer.reset(); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkResetCommandBuffer(commandBuffer, flags); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkFreeCommandBuffers(VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + for (uint32_t i = 0; i < commandBufferCount; i++) + { + tracker.freeCommandBuffer(commandPool, pCommandBuffers[i]); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkFreeCommandBuffers(device, commandPool, commandBufferCount, pCommandBuffers); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdExecuteCommands(VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store and device-wide data + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + auto& tracker = layer->getStateTracker(); + auto& primary = tracker.getCommandBuffer(commandBuffer); + + for (uint32_t i = 0; i < commandBufferCount; i++) + { + auto& secondary = tracker.getCommandBuffer(pCommandBuffers[i]); + primary.executeCommands(secondary); + } + + // Release the lock to call into the main driver + lock.unlock(); + layer->driver.vkCmdExecuteCommands(commandBuffer, commandBufferCount, pCommandBuffers); +} diff --git a/layer_gpu_profile/source/layer_device_functions_command_pool.cpp b/layer_gpu_profile/source/layer_device_functions_command_pool.cpp new file mode 100644 index 0000000..4beb9c7 --- /dev/null +++ b/layer_gpu_profile/source/layer_device_functions_command_pool.cpp @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include "device.hpp" +#include "framework/device_dispatch_table.hpp" + +#include + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateCommandPool(VkDevice device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCommandPool) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult result = layer->driver.vkCreateCommandPool(device, pCreateInfo, pAllocator, pCommandPool); + if (result != VK_SUCCESS) + { + return result; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createCommandPool(*pCommandPool); + return result; +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkResetCommandPool(VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + tracker.getCommandPool(commandPool).reset(); + + // Release the lock to call into the driver + lock.unlock(); + return layer->driver.vkResetCommandPool(device, commandPool, flags); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyCommandPool(VkDevice device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + tracker.destroyCommandPool(commandPool); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkDestroyCommandPool(device, commandPool, pAllocator); +} diff --git a/layer_gpu_profile/source/layer_device_functions_debug.cpp b/layer_gpu_profile/source/layer_device_functions_debug.cpp new file mode 100644 index 0000000..f975b38 --- /dev/null +++ b/layer_gpu_profile/source/layer_device_functions_debug.cpp @@ -0,0 +1,133 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include "device.hpp" +#include "framework/device_dispatch_table.hpp" + +#include + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, + const VkDebugMarkerMarkerInfoEXT* pMarkerInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Only instrument inside active frame of interest + if(layer->isFrameOfInterest) + { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Push the label scope to the tracker + cb.debugMarkerBegin(pMarkerInfo->pMarkerName); + } + + // ... and forward to the driver + lock.unlock(); + layer->driver.vkCmdDebugMarkerBeginEXT(commandBuffer, pMarkerInfo); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Only instrument inside active frame of interest + if(layer->isFrameOfInterest) + { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Pop the label scope in the tracker + cb.debugMarkerEnd(); + } + + // ... and forward to the driver + lock.unlock(); + layer->driver.vkCmdDebugMarkerEndEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT(VkCommandBuffer commandBuffer, + const VkDebugUtilsLabelEXT* pLabelInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Only instrument inside active frame of interest + if(layer->isFrameOfInterest) + { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Push the label scope to the tracker + cb.debugMarkerBegin(pLabelInfo->pLabelName); + } + + // ... and forward to the driver + lock.unlock(); + layer->driver.vkCmdBeginDebugUtilsLabelEXT(commandBuffer, pLabelInfo); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT(VkCommandBuffer commandBuffer) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Only instrument inside active frame of interest + if(layer->isFrameOfInterest) + { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + // Pop the label scope in the tracker + cb.debugMarkerEnd(); + } + + // ... and forward to the driver + lock.unlock(); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} diff --git a/layer_gpu_profile/source/layer_device_functions_dispatch.cpp b/layer_gpu_profile/source/layer_device_functions_dispatch.cpp new file mode 100644 index 0000000..70c59f5 --- /dev/null +++ b/layer_gpu_profile/source/layer_device_functions_dispatch.cpp @@ -0,0 +1,158 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include "device.hpp" +#include "device_utils.hpp" +#include "framework/device_dispatch_table.hpp" + +#include + +extern std::mutex g_vulkanLock; + +/** + * @brief Register a compute dispatch with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param groupX The X size of the dispatch in groups. + * @param groupY The Y size of the dispatch in groups. + * @param groupZ The Z size of the dispatch in groups. + */ +static void registerDispatch(Device* layer, + VkCommandBuffer commandBuffer, + int64_t groupX, + int64_t groupY, + int64_t groupZ) +{ + if (!layer->isFrameOfInterest) + { + return; + } + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + cb.dispatch(groupX, groupY, groupZ); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatch(VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + registerDispatch(layer, + commandBuffer, + static_cast(groupCountX), + static_cast(groupCountY), + static_cast(groupCountZ)); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBase(VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + registerDispatch(layer, + commandBuffer, + static_cast(groupCountX), + static_cast(groupCountY), + static_cast(groupCountZ)); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDispatchBase(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchBaseKHR(VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + registerDispatch(layer, + commandBuffer, + static_cast(groupCountX), + static_cast(groupCountY), + static_cast(groupCountZ)); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDispatchBaseKHR(commandBuffer, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdDispatchIndirect(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + registerDispatch(layer, commandBuffer, -1, -1, -1); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdDispatchIndirect(commandBuffer, buffer, offset); + emitCPUTrap(*layer, commandBuffer); +} diff --git a/layer_gpu_profile/source/layer_device_functions_queue.cpp b/layer_gpu_profile/source/layer_device_functions_queue.cpp new file mode 100644 index 0000000..57c722d --- /dev/null +++ b/layer_gpu_profile/source/layer_device_functions_queue.cpp @@ -0,0 +1,255 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include "device.hpp" +#include "device_utils.hpp" +#include "submit_visitor.hpp" + +#include "framework/device_dispatch_table.hpp" +#include "trackers/queue.hpp" + +#include + +#include +#include + +using json = nlohmann::json; + +extern std::mutex g_vulkanLock; + +/** + * @brief Process the command buffer stream for events. + * + * @param layer The layer context. + * @param queue The queue being submitted to. + * @param commandBuffer The command buffer being submitted. + */ +static void processLayerCommandStream(Device& layer, + VkQueue queue, + VkCommandBuffer commandBuffer) +{ + // Fetch layer proxies for this workload + auto& tracker = layer.getStateTracker(); + auto& trackQueue = tracker.getQueue(queue); + auto& trackCB = tracker.getCommandBuffer(commandBuffer); + + // Play the layer command stream + ProfileSubmitVisitor workloadVisitor(layer); + + const auto& cbLCS = trackCB.getSubmitCommandStream(); + trackQueue.runSubmitCommandStream(cbLCS, workloadVisitor); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(queue); + + auto& tracker = layer->getStateTracker(); + tracker.queuePresent(); + + // End the previous frame if it was "of interest" + if (layer->isFrameOfInterest) + { + json endFrameMessage { + { "type", "end_frame" } + }; + + layer->txMessage(endFrameMessage.dump()); + } + + uint64_t frameID = tracker.totalStats.getFrameCount(); + layer->isFrameOfInterest = layer->instance->config.isFrameOfInterest(frameID); + + // Start the next frame if it is "of interest" + if (layer->isFrameOfInterest) + { + json startFrameMessage { + { "type", "start_frame" }, + { "frame", frameID }, + }; + + layer->txMessage(startFrameMessage.dump()); + } + + // If a "normal" frame then release the lock before calling in to the + // driver, otherwise keep the lock to stop other threads using Vulkan + // while we sync and reset the counter stream + if (!layer->isFrameOfInterest) + { + lock.unlock(); + } + + auto ret = layer->driver.vkQueuePresentKHR(queue, pPresentInfo); + + // If we are measuring performance ensure the previous frame has finished + // and then take an initial sample to reset the counters + if (layer->isFrameOfInterest) + { + layer->driver.vkDeviceWaitIdle(layer->device); + workaroundDelay(); + auto ec = layer->lgcSampler->sample_now(); + if (ec) + { + LAYER_ERR("Failed to make libGPUCounters GPU counter sample"); + } + } + + return ret; +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL + layer_vkQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(queue); + + // If a "normal" frame then release the lock before calling in to the + // driver, otherwise keep the lock to stop other threads using Vulkan + // while we sync and reset the counter stream + if (!layer->isFrameOfInterest) + { + lock.unlock(); + } + + auto res = layer->driver.vkQueueSubmit(queue, submitCount, pSubmits, fence); + if (res != VK_SUCCESS) + { + return res; + } + + // If we are measuring performance then run the layer command stream with + // the lock held to stop other submits perturbing the counter data + if (layer->isFrameOfInterest) + { + for (uint32_t i = 0; i < submitCount; i++) + { + const auto& submit = pSubmits[i]; + for (uint32_t j = 0; j < submit.commandBufferCount; j++) + { + VkCommandBuffer commandBuffer = submit.pCommandBuffers[j]; + processLayerCommandStream(*layer, queue, commandBuffer); + } + } + } + + return res; +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL + layer_vkQueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(queue); + + // If a "normal" frame then release the lock before calling in to the + // driver, otherwise keep the lock to stop other threads using Vulkan + // while we sync and reset the counter stream + if (!layer->isFrameOfInterest) + { + lock.unlock(); + } + + auto res = layer->driver.vkQueueSubmit2(queue, submitCount, pSubmits, fence); + if (res != VK_SUCCESS) + { + return res; + } + + // If we are measuring performance then run the layer command stream with + // the lock held to stop other submits perturbing the counter data + if (layer->isFrameOfInterest) + { + for (uint32_t i = 0; i < submitCount; i++) + { + const auto& submit = pSubmits[i]; + for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++) + { + VkCommandBuffer commandBuffer = submit.pCommandBufferInfos[j].commandBuffer; + processLayerCommandStream(*layer, queue, commandBuffer); + } + } + } + + return res; +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL + layer_vkQueueSubmit2KHR(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(queue); + + // If a "normal" frame then release the lock before calling in to the + // driver, otherwise keep the lock to stop other threads using Vulkan + // while we sync and reset the counter stream + if (!layer->isFrameOfInterest) + { + lock.unlock(); + } + + auto res = layer->driver.vkQueueSubmit2KHR(queue, submitCount, pSubmits, fence); + if (res != VK_SUCCESS || !layer->isFrameOfInterest) + { + return res; + } + + // If we are measuring performance then run the layer command stream with + // the lock held to stop other submits perturbing the counter data + if (layer->isFrameOfInterest) + { + for (uint32_t i = 0; i < submitCount; i++) + { + const auto& submit = pSubmits[i]; + for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++) + { + VkCommandBuffer commandBuffer = submit.pCommandBufferInfos[j].commandBuffer; + processLayerCommandStream(*layer, queue, commandBuffer); + } + } + } + + return res; +} diff --git a/layer_gpu_profile/source/layer_device_functions_render_pass.cpp b/layer_gpu_profile/source/layer_device_functions_render_pass.cpp new file mode 100644 index 0000000..ff9b87a --- /dev/null +++ b/layer_gpu_profile/source/layer_device_functions_render_pass.cpp @@ -0,0 +1,424 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include "device.hpp" +#include "device_utils.hpp" +#include "framework/device_dispatch_table.hpp" +#include "framework/utils.hpp" +#include "trackers/render_pass.hpp" + +#include + +extern std::mutex g_vulkanLock; + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass(VkDevice device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult ret = layer->driver.vkCreateRenderPass(device, pCreateInfo, pAllocator, pRenderPass); + if (ret != VK_SUCCESS) + { + return ret; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createRenderPass(*pRenderPass, *pCreateInfo); + return VK_SUCCESS; +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2(VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult ret = layer->driver.vkCreateRenderPass2(device, pCreateInfo, pAllocator, pRenderPass); + if (ret != VK_SUCCESS) + { + return ret; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createRenderPass(*pRenderPass, *pCreateInfo); + return VK_SUCCESS; +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateRenderPass2KHR(VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(device); + + // Release the lock to call into the driver + lock.unlock(); + VkResult ret = layer->driver.vkCreateRenderPass2KHR(device, pCreateInfo, pAllocator, pRenderPass); + if (ret != VK_SUCCESS) + { + return ret; + } + + // Retake the lock to access layer-wide global store + lock.lock(); + auto& tracker = layer->getStateTracker(); + tracker.createRenderPass(*pRenderPass, *pCreateInfo); + return VK_SUCCESS; +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkDestroyRenderPass(VkDevice device, + VkRenderPass renderPass, + const VkAllocationCallbacks* pAllocator) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(device); + + auto& tracker = layer->getStateTracker(); + tracker.destroyRenderPass(renderPass); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkDestroyRenderPass(device, renderPass, pAllocator); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + if (layer->isFrameOfInterest) + { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); + uint32_t width = pRenderPassBegin->renderArea.extent.width; + uint32_t height = pRenderPassBegin->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + cb.renderPassBegin(rp, width, height); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + if (layer->isFrameOfInterest) + { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); + uint32_t width = pRenderPassBegin->renderArea.extent.width; + uint32_t height = pRenderPassBegin->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + cb.renderPassBegin(rp, width, height); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdBeginRenderPass2(commandBuffer, pRenderPassBegin, pSubpassBeginInfo); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + if (layer->isFrameOfInterest) + { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + auto& rp = tracker.getRenderPass(pRenderPassBegin->renderPass); + uint32_t width = pRenderPassBegin->renderArea.extent.width; + uint32_t height = pRenderPassBegin->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + cb.renderPassBegin(rp, width, height); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdBeginRenderPass2KHR(commandBuffer, pRenderPassBegin, pSubpassBeginInfo); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRendering(VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + if (layer->isFrameOfInterest) + { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT; + bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT; + + // Extract metadata for later use ... + Tracker::RenderPass rp(*pRenderingInfo); + uint32_t width = pRenderingInfo->renderArea.extent.width; + uint32_t height = pRenderingInfo->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + cb.renderPassBegin(rp, width, height, resuming, suspending); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdBeginRendering(commandBuffer, pRenderingInfo); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginRenderingKHR(VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + if (layer->isFrameOfInterest) + { + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + + bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT; + bool suspending = pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT; + + // Extract metadata for later use ... + Tracker::RenderPass rp(*pRenderingInfo); + uint32_t width = pRenderingInfo->renderArea.extent.width; + uint32_t height = pRenderingInfo->renderArea.extent.height; + + // Notify the command buffer we are starting a new render pass + cb.renderPassBegin(rp, width, height, resuming, suspending); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdBeginRenderingKHR(commandBuffer, pRenderingInfo); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass(VkCommandBuffer commandBuffer) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + if (layer->isFrameOfInterest) + { + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + cb.renderPassEnd(); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRenderPass(commandBuffer); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass2(VkCommandBuffer commandBuffer, + const VkSubpassEndInfo* pSubpassEndInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + if (layer->isFrameOfInterest) + { + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + cb.renderPassEnd(); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRenderPass2(commandBuffer, pSubpassEndInfo); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderPass2KHR(VkCommandBuffer commandBuffer, + const VkSubpassEndInfo* pSubpassEndInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + if (layer->isFrameOfInterest) + { + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + cb.renderPassEnd(); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRenderPass2KHR(commandBuffer, pSubpassEndInfo); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRendering(VkCommandBuffer commandBuffer) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + bool suspending {false}; + if (layer->isFrameOfInterest) + { + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + suspending = cb.renderPassEnd(); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRendering(commandBuffer); + if (!suspending) + { + emitCPUTrap(*layer, commandBuffer); + } +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndRenderingKHR(VkCommandBuffer commandBuffer) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + bool suspending {false}; + if (layer->isFrameOfInterest) + { + // Update the layer command stream in the tracker + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + suspending = cb.renderPassEnd(); + } + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdEndRenderingKHR(commandBuffer); + if (!suspending) + { + emitCPUTrap(*layer, commandBuffer); + } +} diff --git a/layer_gpu_profile/source/layer_device_functions_trace_rays.cpp b/layer_gpu_profile/source/layer_device_functions_trace_rays.cpp new file mode 100644 index 0000000..bacd68b --- /dev/null +++ b/layer_gpu_profile/source/layer_device_functions_trace_rays.cpp @@ -0,0 +1,220 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include "device.hpp" +#include "device_utils.hpp" +#include "framework/device_dispatch_table.hpp" + +#include +#include +#include + +extern std::mutex g_vulkanLock; + +/** + * @brief Register an acceleration structure build with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param buildType The build type. + * @param primitiveCount The number of primitives in the build. + */ +static void registerAccelerationStructureBuild(Device* layer, + VkCommandBuffer commandBuffer, + Tracker::LCSAccelerationStructureBuild::Type buildType, + int64_t primitiveCount) +{ + if (!layer->isFrameOfInterest) + { + return; + } + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + cb.accelerationStructureBuild(buildType, primitiveCount); +} + +/** + * @brief Register a trace rays dispatch with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param itemsX The X size of the dispatch in work items. + * @param itemsY The Y size of the dispatch in work items. + * @param itemsZ The Z size of the dispatch in work items. + */ +static void registerTraceRays(Device* layer, + VkCommandBuffer commandBuffer, + int64_t itemsX, + int64_t itemsY, + int64_t itemsZ) +{ + if (!layer->isFrameOfInterest) + { + return; + } + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + cb.traceRays(itemsX, itemsY, itemsZ); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresIndirectKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkDeviceAddress* pIndirectDeviceAddresses, + const uint32_t* pIndirectStrides, + const uint32_t* const* ppMaxPrimitiveCounts) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + registerAccelerationStructureBuild(layer, + commandBuffer, + Tracker::LCSAccelerationStructureBuild::Type::unknown, + -1); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdBuildAccelerationStructuresIndirectKHR(commandBuffer, + infoCount, + pInfos, + pIndirectDeviceAddresses, + pIndirectStrides, + ppMaxPrimitiveCounts); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + registerAccelerationStructureBuild(layer, + commandBuffer, + Tracker::LCSAccelerationStructureBuild::Type::unknown, + -1); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdBuildAccelerationStructuresKHR(commandBuffer, infoCount, pInfos, ppBuildRangeInfos); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, + VkDeviceAddress indirectDeviceAddress) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + registerTraceRays(layer, commandBuffer, -1, -1, -1); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdTraceRaysIndirect2KHR(commandBuffer, indirectDeviceAddress); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + VkDeviceAddress indirectDeviceAddress) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + registerTraceRays(layer, commandBuffer, -1, -1, -1); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdTraceRaysIndirectKHR(commandBuffer, + pRaygenShaderBindingTable, + pMissShaderBindingTable, + pHitShaderBindingTable, + pCallableShaderBindingTable, + indirectDeviceAddress); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdTraceRaysKHR(VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + uint32_t width, + uint32_t height, + uint32_t depth) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + registerTraceRays(layer, commandBuffer, width, height, depth); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdTraceRaysKHR(commandBuffer, + pRaygenShaderBindingTable, + pMissShaderBindingTable, + pHitShaderBindingTable, + pCallableShaderBindingTable, + width, + height, + depth); + emitCPUTrap(*layer, commandBuffer); +} diff --git a/layer_gpu_profile/source/layer_device_functions_transfer.cpp b/layer_gpu_profile/source/layer_device_functions_transfer.cpp new file mode 100644 index 0000000..540a883 --- /dev/null +++ b/layer_gpu_profile/source/layer_device_functions_transfer.cpp @@ -0,0 +1,648 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#include "device.hpp" +#include "device_utils.hpp" +#include "framework/device_dispatch_table.hpp" +#include "trackers/layer_command_stream.hpp" + +#include +#include +#include + +extern std::mutex g_vulkanLock; + +/** + * @brief Register a transfer to a buffer with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param transferType The type of transfer being performed. + * @param byteCount The number of bytes transferred. + */ +static void registerBufferTransfer(Device* layer, + VkCommandBuffer commandBuffer, + Tracker::LCSBufferTransfer::Type transferType, + int64_t byteCount) +{ + if (!layer->isFrameOfInterest) + { + return; + } + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + cb.bufferTransfer(transferType, byteCount); +} + +/** + * @brief Register a transfer to an image with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param transferType The type of transfer being performed. + * @param pixelCount The number of pixels transferred. + */ +static void registerImageTransfer(Device* layer, + VkCommandBuffer commandBuffer, + Tracker::LCSImageTransfer::Type transferType, + int64_t pixelCount) +{ + if (!layer->isFrameOfInterest) + { + return; + } + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + cb.imageTransfer(transferType, pixelCount); +} + +/** + * @brief Register a transfer to an image with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param transferType The type of transfer being performed. + * @param byteCount The number of bytes transferred. + */ +static void registerAccelerationStructureTransfer(Device* layer, + VkCommandBuffer commandBuffer, + Tracker::LCSAccelerationStructureTransfer::Type transferType, + int64_t byteCount) +{ + if (!layer->isFrameOfInterest) + { + return; + } + + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + cb.accelerationStructureTransfer(transferType, byteCount); +} + +// Commands for transfers + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + uint32_t data) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + // TODO: Add buffer tracking so we can turn VK_WHOLE_SIZE into bytes + int64_t byteCount = static_cast(size); + if (size == VK_WHOLE_SIZE) + { + byteCount = -2; + } + + registerBufferTransfer(layer, commandBuffer, Tracker::LCSBufferTransfer::Type::fill_buffer, byteCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdFillBuffer(commandBuffer, dstBuffer, dstOffset, size, data); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage(VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + // TODO: Add image tracking so we can turn image and pRanges into pixels + int64_t pixelCount = -1; + + registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::clear_image, pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdClearColorImage(commandBuffer, image, imageLayout, pColor, rangeCount, pRanges); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage(VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + // TODO: Add image tracking so we can turn image and pRanges into pixels + int64_t pixelCount = -1; + + registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::clear_image, pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdClearDepthStencilImage(commandBuffer, image, imageLayout, pDepthStencil, rangeCount, pRanges); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer(VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t byteCount = 0; + for (uint32_t i = 0; i < regionCount; i++) + { + byteCount += static_cast(pRegions[i].size); + } + + registerBufferTransfer(layer, commandBuffer, Tracker::LCSBufferTransfer::Type::copy_buffer, byteCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, regionCount, pRegions); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2(VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t byteCount = 0; + for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) + { + byteCount += static_cast(pCopyBufferInfo->pRegions[i].size); + } + + registerBufferTransfer(layer, commandBuffer, Tracker::LCSBufferTransfer::Type::copy_buffer, byteCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyBuffer2(commandBuffer, pCopyBufferInfo); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t byteCount = 0; + for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) + { + byteCount += static_cast(pCopyBufferInfo->pRegions[i].size); + } + + registerBufferTransfer(layer, commandBuffer, Tracker::LCSBufferTransfer::Type::copy_buffer, byteCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyBuffer2KHR(commandBuffer, pCopyBufferInfo); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage(VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < regionCount; i++) + { + int64_t rPixelCount = static_cast(pRegions[i].imageExtent.width) + * static_cast(pRegions[i].imageExtent.height) + * static_cast(pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_buffer_to_image, pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyBufferToImage(commandBuffer, srcBuffer, dstImage, dstImageLayout, regionCount, pRegions); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyBufferToImage2(VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.width) + * static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.height) + * static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_buffer_to_image, pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyBufferToImage2(commandBuffer, pCopyBufferToImageInfo); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.width) + * static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.height) + * static_cast(pCopyBufferToImageInfo->pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_buffer_to_image, pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyBufferToImage2KHR(commandBuffer, pCopyBufferToImageInfo); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < regionCount; i++) + { + int64_t rPixelCount = static_cast(pRegions[i].extent.width) + * static_cast(pRegions[i].extent.height) + * static_cast(pRegions[i].extent.depth); + pixelCount += rPixelCount; + } + + registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_image, pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyImage(commandBuffer, srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2(VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyImageInfo->pRegions[i].extent.width) + * static_cast(pCopyImageInfo->pRegions[i].extent.height) + * static_cast(pCopyImageInfo->pRegions[i].extent.depth); + pixelCount += rPixelCount; + } + + registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_image, pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyImage2(commandBuffer, pCopyImageInfo); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR(VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyImageInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyImageInfo->pRegions[i].extent.width) + * static_cast(pCopyImageInfo->pRegions[i].extent.height) + * static_cast(pCopyImageInfo->pRegions[i].extent.depth); + pixelCount += rPixelCount; + } + + registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_image, pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyImage2KHR(commandBuffer, pCopyImageInfo); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < regionCount; i++) + { + int64_t rPixelCount = static_cast(pRegions[i].imageExtent.width) + * static_cast(pRegions[i].imageExtent.height) + * static_cast(pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + // TODO: Our usual convention is to mark the transfer using the destination + // type, which means this should be a bufferTransfer reporting size in + // bytes. Without image tracking we only have pixels, so for now we report + // as "Copy image" and report size in pixels. + registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_image_to_buffer, pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyImageToBuffer(commandBuffer, srcImage, srcImageLayout, dstBuffer, regionCount, pRegions); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyImageToBufferInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.width) + * static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.height) + * static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + // TODO: Our usual convention is to mark the transfer using the destination + // type, which means this should be a bufferTransfer reporting size in + // bytes. Without image tracking we only have pixels, so for now we report + // as "Copy image" and report size in pixels. + registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_image_to_buffer, pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyImageToBuffer2(commandBuffer, pCopyImageToBufferInfo); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Compute the size of the transfer + int64_t pixelCount = 0; + for (uint32_t i = 0; i < pCopyImageToBufferInfo->regionCount; i++) + { + int64_t rPixelCount = static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.width) + * static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.height) + * static_cast(pCopyImageToBufferInfo->pRegions[i].imageExtent.depth); + pixelCount += rPixelCount; + } + + // TODO: Our usual convention is to mark the transfer using the destination + // type, which means this should be a bufferTransfer reporting size in + // bytes. Without image tracking we only have pixels, so for now we report + // as "Copy image" and report size in pixels. + registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_image_to_buffer, pixelCount); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyImageToBuffer2KHR(commandBuffer, pCopyImageToBufferInfo); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureInfoKHR* pInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // TODO: We ideally want to track sizes of the transfers, but this requires + // dispatching vkCmdWriteAccelerationStructuresPropertiesKHR() queries and + // capturing the result "later" which we don't support yet. + // We can approximate the size using vkGetAccelerationStructureBuildSizesKHR(), + // but this returns the build size which may be larger than the size of the + // AS itself which can be smaller (especially if later compacted). + registerAccelerationStructureTransfer(layer, + commandBuffer, + Tracker::LCSAccelerationStructureTransfer::Type::struct_to_struct, + -1); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyAccelerationStructureKHR(commandBuffer, pInfo); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // TODO: We ideally want to track sizes of the transfers, but this requires + // dispatching vkCmdWriteAccelerationStructuresPropertiesKHR() queries and + // capturing the result "later" which we don't support yet. + // We can approximate the size using vkGetAccelerationStructureBuildSizesKHR(), + // but this returns the build size which may be larger than the size of the + // AS itself which can be smaller (especially if later compacted). + registerAccelerationStructureTransfer(layer, + commandBuffer, + Tracker::LCSAccelerationStructureTransfer::Type::struct_to_mem, + -1); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyAccelerationStructureToMemoryKHR(commandBuffer, pInfo); + emitCPUTrap(*layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // TODO: We ideally want to track sizes of the transfers, but this requires + // dispatching vkCmdWriteAccelerationStructuresPropertiesKHR() queries and + // capturing the result "later" which we don't support yet. + // We can approximate the size using vkGetAccelerationStructureBuildSizesKHR(), + // but this returns the build size which may be larger than the size of the + // AS itself which can be smaller (especially if later compacted). + registerAccelerationStructureTransfer(layer, + commandBuffer, + Tracker::LCSAccelerationStructureTransfer::Type::mem_to_struct, + -1); + + // Release the lock to call into the driver + lock.unlock(); + layer->driver.vkCmdCopyMemoryToAccelerationStructureKHR(commandBuffer, pInfo); + emitCPUTrap(*layer, commandBuffer); +} diff --git a/layer_gpu_profile/source/submit_visitor.cpp b/layer_gpu_profile/source/submit_visitor.cpp new file mode 100644 index 0000000..56d7b9c --- /dev/null +++ b/layer_gpu_profile/source/submit_visitor.cpp @@ -0,0 +1,174 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ +#include "device_utils.hpp" +#include "submit_visitor.hpp" +#include "framework/utils.hpp" + +#include + +#include + +using json = nlohmann::json; + +/* See header for documentation */ +void ProfileSubmitVisitor::handleCPUTrap( + const std::string& workloadType, + const std::vector& debugStack +) { + json message { + { "type", workloadType }, + { "labels", debugStack }, + { "counters", json::array() } + }; + + waitForGPU(device); + + auto ec = device.lgcSampler->sample_now(); + + notifyGPU(device); + + if (ec) + { + LAYER_ERR("Failed to make libGPUCounters GPU counter sample"); + } + else + { + for (const auto& pair : device.lgcActiveCounters) + { + hwcpipe::counter_sample sample; + ec = device.lgcSampler->get_counter_value(pair.first, sample); + if (ec) + { + LAYER_ERR("Failed to get libGPUCounters GPU counter value"); + continue; + } + + if (sample.type == hwcpipe::counter_sample::type::uint64) + { + json counter { + { pair.second, sample.value.uint64 }, + }; + + message["counters"].push_back(counter); + + } + else + { + json counter { + { pair.second, sample.value.float64 }, + }; + + message["counters"].push_back(counter); + + } + } + } + + device.txMessage(message.dump()); + +} + +/* See header for documentation */ +void ProfileSubmitVisitor::operator()( + const Tracker::LCSRenderPass& renderPass, + const std::vector& debugStack +) { + UNUSED(renderPass); + + handleCPUTrap("renderpass", debugStack); +} + +/* See header for documentation */ +void ProfileSubmitVisitor::operator()( + const Tracker::LCSRenderPassContinuation& continuation, + const std::vector& debugStack, + uint64_t renderPassTagID +) { + UNUSED(continuation); + UNUSED(debugStack); + UNUSED(renderPassTagID); + + // Ignore continuations because we only trigger one trap per render pass +} + +/* See header for documentation */ +void ProfileSubmitVisitor::operator()( + const Tracker::LCSDispatch& dispatch, + const std::vector& debugStack +) { + UNUSED(dispatch); + + handleCPUTrap("compute", debugStack); +} + +/* See header for documentation */ +void ProfileSubmitVisitor::operator()( + const Tracker::LCSTraceRays& traceRays, + const std::vector& debugStack +) { + UNUSED(traceRays); + + handleCPUTrap("tracerays", debugStack); +} + +/* See header for documentation */ +void ProfileSubmitVisitor::operator()( + const Tracker::LCSImageTransfer& imageTransfer, + const std::vector& debugStack +) { + UNUSED(imageTransfer); + + handleCPUTrap("image_transfer", debugStack); +} + +/* See header for documentation */ +void ProfileSubmitVisitor::operator()( + const Tracker::LCSBufferTransfer& bufferTransfer, + const std::vector& debugStack +) { + UNUSED(bufferTransfer); + + handleCPUTrap("buffer_transfer", debugStack); +} + +/* See header for documentation */ +void ProfileSubmitVisitor::operator()( + const Tracker::LCSAccelerationStructureBuild& asBuild, + const std::vector& debugStack +) { + UNUSED(asBuild); + + handleCPUTrap("as_build", debugStack); +} + +/* See header for documentation */ +void ProfileSubmitVisitor::operator()( + const Tracker::LCSAccelerationStructureTransfer& asTransfer, + const std::vector& debugStack +) { + UNUSED(asTransfer); + + handleCPUTrap("as_transfer", debugStack); +} diff --git a/layer_gpu_profile/source/submit_visitor.hpp b/layer_gpu_profile/source/submit_visitor.hpp new file mode 100644 index 0000000..90ac752 --- /dev/null +++ b/layer_gpu_profile/source/submit_visitor.hpp @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#include "device.hpp" +#include "trackers/layer_command_stream.hpp" +#include "trackers/queue.hpp" + +#include +#include + +/** + * Handles setting up async state ... + */ +class ProfileSubmitVisitor : public Tracker::SubmitCommandWorkloadVisitor +{ +public: + /** + * Construct a profile workload visitor for a layer command stream. + * + * @param _device The device object for the command stream. + */ + ProfileSubmitVisitor(Device& _device) + : device(_device) + { + } + + // Visitor should not be copied or moved from + ProfileSubmitVisitor(const ProfileSubmitVisitor&) = delete; + ProfileSubmitVisitor(ProfileSubmitVisitor&&) noexcept = delete; + ProfileSubmitVisitor& operator=(const ProfileSubmitVisitor&) = delete; + ProfileSubmitVisitor& operator=(ProfileSubmitVisitor&&) noexcept = delete; + + // Methods from the visitor interface + void operator()( + const Tracker::LCSRenderPass& renderPass, + const std::vector& debugStack) override; + + void operator()( + const Tracker::LCSRenderPassContinuation& continuation, + const std::vector& debugStack, + uint64_t renderPassTagID) override; + + void operator()( + const Tracker::LCSDispatch& dispatch, + const std::vector& debugStack) override; + + void operator()( + const Tracker::LCSTraceRays& traceRays, + const std::vector& debugStack) override; + + void operator()( + const Tracker::LCSImageTransfer& imageTransfer, + const std::vector& debugStack) override; + + void operator()( + const Tracker::LCSBufferTransfer& bufferTransfer, + const std::vector& debugStack) override; + + void operator()( + const Tracker::LCSAccelerationStructureBuild& asBuild, + const std::vector& debugStack) override; + + void operator()( + const Tracker::LCSAccelerationStructureTransfer& asTransfer, + const std::vector& debugStack) override; + +private: + /** + * @brief Handle the CPU-side of the counter sampling sequence. + * + * @param workloadType The coarse type of the workload. + * @param debugStack The user debug label stack. + */ + void handleCPUTrap( + const std::string& workloadType, + const std::vector& debugStack); + +private: + Device& device; +}; + diff --git a/layer_gpu_profile/source/version.hpp.in b/layer_gpu_profile/source/version.hpp.in new file mode 100644 index 0000000..ff3777f --- /dev/null +++ b/layer_gpu_profile/source/version.hpp.in @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: MIT + * ---------------------------------------------------------------------------- + * Copyright (c) 2024-2025 Arm Limited + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * ---------------------------------------------------------------------------- + */ + +/** + * @file Placeholder templates that are populated by CMake during configure. + */ + +#pragma once + +#define LGL_VER_MAJOR @PROJECT_VERSION_MAJOR@ +#define LGL_VER_MINOR @PROJECT_VERSION_MINOR@ +#define LGL_VER_PATCH @PROJECT_VERSION_PATCH@ +#define LGL_LAYER_NAME "@LGL_LAYER_NAME_STR@" +#define LGL_LAYER_DESC "@LGL_LAYER_DESC_STR@" + +#define LGL_LAYER_CONFIG "@LGL_LAYER_NAME_STR@.json" diff --git a/layer_gpu_support/README_LAYER.md b/layer_gpu_support/README_LAYER.md index ccf22ca..4d60ead 100644 --- a/layer_gpu_support/README_LAYER.md +++ b/layer_gpu_support/README_LAYER.md @@ -43,10 +43,10 @@ sections in the [Build documentation](../docs/building.md). ### Running using the layer -You can perform support experiments by using the Android helper utility found -in the root directory to configure the layer and manage the application. You -must enable the support layer, and provide a configuration file to parameterize -it. +You can configure a device to run support experiments by using the Android +helper utility found in the root directory to configure the layer and manage +the application. You must enable the support layer, and provide a configuration +file to parameterize it. ```sh python3 lgl_android_install.py --layer layer_gpu_support --config diff --git a/layer_gpu_support/source/layer_config.cpp b/layer_gpu_support/source/layer_config.cpp index b150d02..a66c1c3 100644 --- a/layer_gpu_support/source/layer_config.cpp +++ b/layer_gpu_support/source/layer_config.cpp @@ -279,7 +279,6 @@ LayerConfig::LayerConfig() LAYER_ERR("Error: %s", e.what()); } - try { parse_serialization_options(data); diff --git a/layer_gpu_timeline/source/layer_device_functions_debug.cpp b/layer_gpu_timeline/source/layer_device_functions_debug.cpp index 7795598..37b67e2 100644 --- a/layer_gpu_timeline/source/layer_device_functions_debug.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_debug.cpp @@ -1,7 +1,7 @@ /* * SPDX-License-Identifier: MIT * ---------------------------------------------------------------------------- - * Copyright (c) 2024 Arm Limited + * Copyright (c) 2024-2025 Arm Limited * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -44,7 +44,7 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerBeginEXT(VkCommandBuf auto& tracker = layer->getStateTracker(); auto& cb = tracker.getCommandBuffer(commandBuffer); - // Increment the render pass counter in the tracker + // Push the label scope to the tracker cb.debugMarkerBegin(pMarkerInfo->pMarkerName); // Note that we do not call the driver for user labels - they are @@ -65,7 +65,7 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdDebugMarkerEndEXT(VkCommandBuffe auto& tracker = layer->getStateTracker(); auto& cb = tracker.getCommandBuffer(commandBuffer); - // Increment the render pass counter in the tracker + // Pop the label scope in the tracker cb.debugMarkerEnd(); // Note that we do not call the driver for user labels - they are @@ -87,7 +87,7 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdBeginDebugUtilsLabelEXT(VkComman auto& tracker = layer->getStateTracker(); auto& cb = tracker.getCommandBuffer(commandBuffer); - // Increment the render pass counter in the tracker + // Push the label scope to the tracker cb.debugMarkerBegin(pLabelInfo->pLabelName); // Note that we do not call the driver for user labels - they are @@ -108,7 +108,7 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdEndDebugUtilsLabelEXT(VkCommandB auto& tracker = layer->getStateTracker(); auto& cb = tracker.getCommandBuffer(commandBuffer); - // Increment the render pass counter in the tracker + // Pop the label scope in the tracker cb.debugMarkerEnd(); // Note that we do not call the driver for user labels - they are diff --git a/layer_gpu_timeline/source/timeline_comms.hpp b/layer_gpu_timeline/source/timeline_comms.hpp index 15bc518..fecd9eb 100644 --- a/layer_gpu_timeline/source/timeline_comms.hpp +++ b/layer_gpu_timeline/source/timeline_comms.hpp @@ -1,7 +1,7 @@ /* * SPDX-License-Identifier: MIT * ---------------------------------------------------------------------------- - * Copyright (c) 2024 Arm Limited + * Copyright (c) 2024-2025 Arm Limited * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -33,11 +33,6 @@ /** * @brief A simple message encoder for the timeline comms endpoint. - * - * TODO: This is currently a very simple implementation because we are simply - * passing JSON strings around. This is not the most efficient way of doing - * this and in future this module will be used to implement binary encoders - * for each specific message type that needs sending. */ class TimelineComms { diff --git a/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp b/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp index 2d5bdfd..bd8bf76 100644 --- a/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp +++ b/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp @@ -87,7 +87,7 @@ using Submit = pp::message< /* The VkQueue the frame belongs to */ pp::uint64_field<"queue", 3>>; -/* Enumerates the possible attachment types a renderpass can have */ +/* Enumerates the possible attachment types a render pass can have */ enum class RenderpassAttachmentType { undefined = 0, @@ -96,7 +96,7 @@ enum class RenderpassAttachmentType stencil = 3, }; -/* Describe an attachment to a renderpass */ +/* Describe an attachment to a render pass */ using RenderpassAttachment = pp::message< /* The attachment type */ pp::enum_field<"type", 1, RenderpassAttachmentType>, @@ -112,29 +112,29 @@ using RenderpassAttachment = pp::message< things are not resolved, so this saves a field in the data) */ pp::bool_field<"resolved", 5>>; -/* Start a new renderpass */ +/* Start a new render pass */ using BeginRenderpass = pp::message< - /* The unique identifier for this new renderpass */ + /* The unique identifier for this new render pass */ pp::uint64_field<"tag_id", 1>, - /* The dimensions of the renderpass' attachments */ + /* The dimensions of the render pass' attachments */ pp::uint32_field<"width", 2>, pp::uint32_field<"height", 3>, - /* The number of drawcalls in the renderpass */ + /* The number of drawcalls in the render pass */ pp::uint32_field<"draw_call_count", 4>, /* The subpass count */ pp::uint32_field<"subpass_count", 5>, - /* Any user defined debug labels associated with the renderpass */ + /* Any user defined debug labels associated with the render pass */ pp::string_field<"debug_label", 6, pp::repeated>, - /* Any attachments associated with the renderpass */ + /* Any attachments associated with the render pass */ pp::message_field<"attachments", 7, RenderpassAttachment, pp::repeated>>; -/* Continue a split renderpass */ +/* Continue a split render pass */ using ContinueRenderpass = pp::message< - /* The unique identifier for the renderpass that is being continued */ + /* The unique identifier for the render pass that is being continued */ pp::uint64_field<"tag_id", 1>, - /* The number of drawcalls to add to the total in the renderpass */ + /* The number of drawcalls to add to the total in the render pass */ pp::uint32_field<"draw_call_count", 2>, - /* Any user defined debug labels to add to the renderpass */ + /* Any user defined debug labels to add to the render pass */ pp::string_field<"debug_label", 3, pp::repeated>>; /* A dispatch object submission */ @@ -295,7 +295,7 @@ Comms::MessageData packBuffer(pp::constant c, T&& f) * @return A pair, where the first value is the corresponding attachment type, and the second value is * the corresponding attachment index (or nullopt in the case the index is not relevant). */ -constexpr std::pair> mapRenderpassAttachmentName( +constexpr std::pair> mapRenderPassAttachmentName( Tracker::RenderPassAttachName name) { switch (name) @@ -443,10 +443,10 @@ constexpr AccelerationStructureTransferType mapASTransferType(Tracker::LCSAccele /** * @brief Serialize the metadata for this render pass workload. * - * @param renderpass The render pass to serialize + * @param renderPass The render pass to serialize * @param debugLabel The debug label stack of the VkQueue at submit time. */ -Comms::MessageData serialize(const Tracker::LCSRenderPass& renderpass, const std::vector& debugLabel) +Comms::MessageData serialize(const Tracker::LCSRenderPass& renderPass, const std::vector& debugLabel) { using namespace pp; @@ -454,18 +454,18 @@ Comms::MessageData serialize(const Tracker::LCSRenderPass& renderpass, const std // associated with a single tagID if restartable across command buffer // boundaries because different command buffer submit combinations can // result in different draw counts for the same starting tagID. - const auto drawCount = (!renderpass.isOneTimeSubmit() && renderpass.isSuspending() + const auto drawCount = (!renderPass.isOneTimeSubmit() && renderPass.isSuspending() ? -1 - : static_cast(renderpass.getDrawCallCount())); + : static_cast(renderPass.getDrawCallCount())); // Make the attachments array - const auto& attachments = renderpass.getAttachments(); + const auto& attachments = renderPass.getAttachments(); std::vector attachmentsMsg {}; attachmentsMsg.reserve(attachments.size()); for (const auto& attachment : attachments) { - const auto [type, index] = mapRenderpassAttachmentName(attachment.getAttachmentName()); + const auto [type, index] = mapRenderPassAttachmentName(attachment.getAttachmentName()); attachmentsMsg.emplace_back(type, index, @@ -479,11 +479,11 @@ Comms::MessageData serialize(const Tracker::LCSRenderPass& renderpass, const std return packBuffer("renderpass"_f, BeginRenderpass { - renderpass.getTagID(), - renderpass.getWidth(), - renderpass.getHeight(), + renderPass.getTagID(), + renderPass.getWidth(), + renderPass.getHeight(), drawCount, - renderpass.getSubpassCount(), + renderPass.getSubpassCount(), debugLabel, std::move(attachmentsMsg), }); @@ -492,7 +492,7 @@ Comms::MessageData serialize(const Tracker::LCSRenderPass& renderpass, const std /** * @brief Serialize the metadata for this render pass continuation workload. * - * @param continuation The renderpass continuation to serialize + * @param continuation The render pass continuation to serialize * @param tagIDContinuation The ID of the workload if this is a continuation of it. */ Comms::MessageData serialize(const Tracker::LCSRenderPassContinuation& continuation, uint64_t tagIDContinuation) @@ -681,19 +681,19 @@ void TimelineProtobufEncoder::emitSubmit(VkQueue queue, uint64_t timestamp) })); } -void TimelineProtobufEncoder::operator()(const Tracker::LCSRenderPass& renderpass, +void TimelineProtobufEncoder::operator()(const Tracker::LCSRenderPass& renderPass, const std::vector& debugStack) { - device.txMessage(serialize(renderpass, debugStack)); + device.txMessage(serialize(renderPass, debugStack)); } void TimelineProtobufEncoder::operator()(const Tracker::LCSRenderPassContinuation& continuation, const std::vector& debugStack, - uint64_t renderpassTagID) + uint64_t renderPassTagID) { UNUSED(debugStack); - device.txMessage(serialize(continuation, renderpassTagID)); + device.txMessage(serialize(continuation, renderPassTagID)); } void TimelineProtobufEncoder::operator()(const Tracker::LCSDispatch& dispatch, diff --git a/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp b/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp index 34e6e4b..9b5b3c3 100644 --- a/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp +++ b/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp @@ -90,7 +90,7 @@ class TimelineProtobufEncoder : public Tracker::SubmitCommandWorkloadVisitor static void emitFrame(Device& device, uint64_t frameNumber, uint64_t timestamp); /** - * Construct a new workload metadata emitter that will output paylaods for the provided device + * Construct a new workload metadata emitter that will output payloads for the provided device * * @param _device The device object that the payloads are produced for, and to which they are passed for * transmission @@ -100,17 +100,17 @@ class TimelineProtobufEncoder : public Tracker::SubmitCommandWorkloadVisitor { } - // visitor should not be copied or moved from + // Visitor should not be copied or moved from TimelineProtobufEncoder(const TimelineProtobufEncoder&) = delete; TimelineProtobufEncoder(TimelineProtobufEncoder&&) noexcept = delete; TimelineProtobufEncoder& operator=(const TimelineProtobufEncoder&) = delete; TimelineProtobufEncoder& operator=(TimelineProtobufEncoder&&) noexcept = delete; - // methods from the visitor interface - void operator()(const Tracker::LCSRenderPass& renderpass, const std::vector& debugStack) override; + // Methods from the visitor interface + void operator()(const Tracker::LCSRenderPass& renderPass, const std::vector& debugStack) override; void operator()(const Tracker::LCSRenderPassContinuation& continuation, const std::vector& debugStack, - uint64_t renderpassTagID) override; + uint64_t renderPassTagID) override; void operator()(const Tracker::LCSDispatch& dispatch, const std::vector& debugStack) override; void operator()(const Tracker::LCSTraceRays& traceRays, const std::vector& debugStack) override; void operator()(const Tracker::LCSImageTransfer& imageTransfer, diff --git a/lgl_android_install.py b/lgl_android_install.py index c4c9738..8e6cc9c 100755 --- a/lgl_android_install.py +++ b/lgl_android_install.py @@ -135,6 +135,7 @@ from lglpy.android.filesystem import AndroidFilesystem from lglpy.comms import server from lglpy.comms import service_gpu_timeline +from lglpy.comms import service_gpu_profile from lglpy.ui import console # Android 9 is the minimum version supported for our method of enabling layers @@ -592,7 +593,9 @@ def cleanup(child_process): print('WARNING: Cannot enable logcat recording') -def configure_server(conn: ADBConnect, output_path: str) -> None: +def configure_server(conn: ADBConnect, + timeline_file: Optional[str], + profile_dir: Optional[str]) -> None: ''' Configure the remote server to collect data. @@ -601,13 +604,20 @@ def configure_server(conn: ADBConnect, output_path: str) -> None: Args: conn: The adb connection. - output_path: The desired output file path. + timeline_file: The desired output file path for timeline. + profile_dir: The desired output directory path for timeline. Existing + files in the directory may be overwritten. ''' # Create a server instance instance = server.CommsServer(0) - service = service_gpu_timeline.GPUTimelineService(output_path) - instance.register_endpoint(service) + if timeline_file: + serviceTL = service_gpu_timeline.GPUTimelineService(timeline_file) + instance.register_endpoint(serviceTL) + + if profile_dir: + serviceProf = service_gpu_profile.GPUProfileService(profile_dir) + instance.register_endpoint(serviceProf) # Start it running thread = threading.Thread(target=instance.run, daemon=True) @@ -784,6 +794,10 @@ def parse_cli() -> Optional[argparse.Namespace]: '--timeline-perfetto', type=str, default=None, help='save Timeline Perfetto trace to this file') + parser.add_argument( + '--profile', type=str, default=None, + help='save Profile data to this directory') + args = parser.parse_args() # Validate arguments @@ -886,9 +900,11 @@ def main() -> int: print(f' - {layer.name}') print() - # Enable Timeline - if args.timeline_metadata: - configure_server(conn, args.timeline_metadata) + # Enable communications server + if args.timeline_metadata or args.profile: + configure_server(conn, + args.timeline_metadata, + args.profile) # Enable logcat if args.logcat: @@ -918,7 +934,7 @@ def main() -> int: print(f'{message:<{max_len}}') else: - input('Press any key when finished to uninstall all layers') + input('Press any key when finished to uninstall all layers\n\n') print('\nUninstalling all layers') diff --git a/lglpy/comms/service_gpu_profile.py b/lglpy/comms/service_gpu_profile.py new file mode 100644 index 0000000..7c30e5e --- /dev/null +++ b/lglpy/comms/service_gpu_profile.py @@ -0,0 +1,197 @@ +# SPDX-License-Identifier: MIT +# ----------------------------------------------------------------------------- +# Copyright (c) 2024-2025 Arm Limited +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the 'Software'), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ----------------------------------------------------------------------------- + +''' +This module implements the server-side communications module service that +handles record preprocessing and serializing the resulting GPU Profile layer. +''' + +import csv +import json +import os +from typing import Any, Optional, TypedDict, Union + +from lglpy.comms.server import Message + + +class StartFrameMessage(TypedDict): + ''' + Type information for a start_frame JSON message. + ''' + type: str + frame: int + + +class EndFrameMessage(TypedDict): + ''' + Type information for an end_frame JSON message. + ''' + type: str + + +class WorkloadMessage(TypedDict): + ''' + Type information for any workload JSON message. + ''' + type: str + counters: list[dict[str, Union[int, float]]] + labels: list[str] + + +class GPUProfileService: + ''' + A service for handling network comms from the layer_gpu_profile layer. + ''' + + def __init__(self, dir_path: str, verbose: bool = False): + ''' + Initialize the profile service. + + Args: + dir_path: Directory to write on the filesystem + verbose: Should this use verbose logging? + ''' + self.base_dir = dir_path + + self.frame_id: Optional[int] = None + self.frame_header: Optional[list[str]] = None + self.frame_data: Optional[list[list[str]]] = None + + os.makedirs(dir_path, exist_ok=True) + + def get_service_name(self) -> str: + ''' + Get the service endpoint name. + + Returns: + The endpoint name. + ''' + return 'GPUProfile' + + def handle_start_frame(self, message: StartFrameMessage): + ''' + Handle a start_frame message. + + Args: + message: The decoded JSON. + ''' + self.frame_id = message["frame"] + self.frame_header = None + self.frame_data = [] + + def handle_end_frame(self, message: EndFrameMessage): + ''' + Handle an end_frame message. + + Args: + message: The decoded JSON. + ''' + # Message contains nothing we need + del message + + assert self.frame_id is not None + assert self.frame_header is not None + assert self.frame_data is not None + + # Emit the CSV file + print(f'Generating CSV for frame {self.frame_id}') + path = os.path.join(self.base_dir, f'frame_{self.frame_id:05d}.csv') + with open(path, 'w', newline='') as handle: + writer = csv.writer(handle) + writer.writerow(self.frame_header) + writer.writerows(self.frame_data) + + # Reset the state + self.frame_id = None + self.frame_header = None + self.frame_data = None + + def create_workload_header(self, message: WorkloadMessage): + ''' + Create a table header row from a workload. + + Args: + message: The decoded JSON. + ''' + columns = [] + + columns.append('Index') + columns.append('Workload type') + for counter in message['counters']: + key = list(counter.keys())[0] + columns.append(key) + columns.append('Label') + + self.frame_header = columns + + def create_workload_data(self, message: WorkloadMessage): + ''' + Create a table data row from a workload. + + Args: + message: The decoded JSON. + ''' + assert self.frame_id is not None + assert self.frame_header is not None + assert self.frame_data is not None + + columns: list[str] = [] + + columns.append(str(len(self.frame_data))) + columns.append(message['type']) + + for counter in message['counters']: + value = list(counter.values())[0] + columns.append(f'{value:0.2f}') + columns.append('|'.join(message['labels'])) + + self.frame_data.append(columns) + + def handle_workload(self, message: WorkloadMessage): + ''' + Handle a workload message. + + Args: + message: The decoded JSON. + ''' + if not self.frame_header: + self.create_workload_header(message) + + self.create_workload_data(message) + + def handle_message(self, message: Message) -> None: + ''' + Handle a service request from a layer. + + Note that this service only expects pushed TX or TX_ASYNC messages, so + never provides a response. + ''' + encoded_payload = message.payload.decode('utf-8') + payload = json.loads(encoded_payload) + + if payload['type'] == 'start_frame': + self.handle_start_frame(payload) + elif payload['type'] == 'end_frame': + self.handle_end_frame(payload) + else: + self.handle_workload(payload) diff --git a/source_common/comms/comms_interface.hpp b/source_common/comms/comms_interface.hpp index 20a237b..d21776e 100644 --- a/source_common/comms/comms_interface.hpp +++ b/source_common/comms/comms_interface.hpp @@ -1,7 +1,7 @@ /* * SPDX-License-Identifier: MIT * ---------------------------------------------------------------------------- - * Copyright (c) 2024 Arm Limited + * Copyright (c) 2024-2025 Arm Limited * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -63,7 +63,7 @@ static const EndpointID NO_ENDPOINT {0}; class CommsInterface { public: - virtual ~CommsInterface() { } + virtual ~CommsInterface() = default; /** * @brief Is this comms module connected to a host server? diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp index 022eac9..763dded 100644 --- a/source_common/trackers/queue.cpp +++ b/source_common/trackers/queue.cpp @@ -80,7 +80,7 @@ namespace } /** - * @brief Visit a renderpass workload instruction + * @brief Visit a render pass workload instruction * * @param instruction The workload instruction */ @@ -99,7 +99,7 @@ namespace } /** - * @brief Visit a renderpass continuation workload instruction + * @brief Visit a render pass continuation workload instruction * * @param instruction The workload instruction */ diff --git a/source_common/trackers/queue.hpp b/source_common/trackers/queue.hpp index 2116964..6d8bed3 100644 --- a/source_common/trackers/queue.hpp +++ b/source_common/trackers/queue.hpp @@ -62,23 +62,23 @@ class SubmitCommandWorkloadVisitor virtual ~SubmitCommandWorkloadVisitor() noexcept = default; /** - * @brief Visit a renderpass workload object + * @brief Visit a render pass workload object * - * @param renderpass The renderpass - * @param debugStack The stack of debug labels that are associated with this renderpass + * @param renderPass The render pass + * @param debugStack The stack of debug labels that are associated with this render pass */ - virtual void operator()(const LCSRenderPass& renderpass, const std::vector& debugStack) = 0; + virtual void operator()(const LCSRenderPass& renderPass, const std::vector& debugStack) = 0; /** - * @brief Visit a renderpass continuation workload object + * @brief Visit a render pass continuation workload object * - * @param continuation The renderpass continuation - * @param debugStack The stack of debug labels that are associated with this renderpass - * @param renderpassTagID The renderpass tag that the continuation was associated with + * @param continuation The render pass continuation + * @param debugStack The stack of debug labels that are associated with this render pass + * @param renderPassTagID The render pass tag that the continuation was associated with */ virtual void operator()(const LCSRenderPassContinuation& continuation, const std::vector& debugStack, - uint64_t renderpassTagID) = 0; + uint64_t renderPassTagID) = 0; /** * @brief Visit a dispatch workload object diff --git a/source_common/utils/queue.hpp b/source_common/utils/queue.hpp index 4434837..373f68d 100644 --- a/source_common/utils/queue.hpp +++ b/source_common/utils/queue.hpp @@ -1,7 +1,7 @@ /* * SPDX-License-Identifier: MIT * ---------------------------------------------------------------------------- - * Copyright (c) 2024 Arm Limited + * Copyright (c) 2024-2025 Arm Limited * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -44,12 +44,13 @@ class Task /** * @brief Destroy the task. */ - virtual ~Task() { } + virtual ~Task() = default; /** * @brief Wait for the task to be complete. */ - void wait() { + void wait() + { std::unique_lock lock(condition_lock); complete_condition.wait(lock, [this]{ return complete.load(); }); } @@ -57,7 +58,8 @@ class Task /** * @brief Notify that the task is complete. */ - void notify() { + void notify() + { std::unique_lock lock(condition_lock); complete = true; lock.unlock(); diff --git a/source_third_party/libGPUCounters b/source_third_party/libGPUCounters new file mode 160000 index 0000000..f60cfa8 --- /dev/null +++ b/source_third_party/libGPUCounters @@ -0,0 +1 @@ +Subproject commit f60cfa830c85ffff09c70318e573a4672ab590c9