Skip to content

Commit 02f5b26

Browse files
authored
Profile layer: Add layer_gpu_profile (#132)
Adds the first release of a new off-the-shelf layer for measuring per workload performance counters on system with an Arm GPU. See layer readme documentation for details.
1 parent 3f06893 commit 02f5b26

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+4824
-74
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,6 @@
1010
[submodule "source_third_party/khronos/vulkan-utilities"]
1111
path = source_third_party/khronos/vulkan-utilities
1212
url = https://github.com/KhronosGroup/Vulkan-Utility-Libraries/
13+
[submodule "source_third_party/libGPUCounters"]
14+
path = source_third_party/libGPUCounters
15+
url = https://github.com/ARM-software/libGPUCounters.git

layer_gpu_profile/CMakeLists.txt

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# SPDX-License-Identifier: MIT
2+
# -----------------------------------------------------------------------------
3+
# Copyright (c) 2024-2025 Arm Limited
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to
7+
# deal in the Software without restriction, including without limitation the
8+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
9+
# sell copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in
13+
# all copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
# -----------------------------------------------------------------------------
23+
24+
cmake_minimum_required(VERSION 3.19)
25+
26+
set(CMAKE_CXX_STANDARD 20)
27+
set(CMAKE_CXX_STANDARD_REQUIRES ON)
28+
29+
project(VkLayerGPUProfile VERSION 1.0.0)
30+
31+
# Common configuration
32+
set(LGL_LOG_TAG "VkLayerGPUProfile")
33+
set(LGL_CONFIG_TRACE 0)
34+
set(LGL_CONFIG_LOG 1)
35+
36+
include(../source_common/compiler_helper.cmake)
37+
include(../cmake/clang-tools.cmake)
38+
39+
# Build steps
40+
add_subdirectory(../source_third_party/libGPUCounters source_third_party/libGPUCounters)
41+
42+
add_subdirectory(../source_common/comms source_common/comms)
43+
add_subdirectory(../source_common/framework source_common/framework)
44+
add_subdirectory(../source_common/trackers source_common/trackers)
45+
46+
add_subdirectory(source)

layer_gpu_profile/README_LAYER.md

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# Layer: GPU Profile
2+
3+
This layer is a frame profiler that can capture per workload performance
4+
counters for selected frames running on an Arm GPU.
5+
6+
## What devices are supported?
7+
8+
This layer requires Vulkan 1.0 and an Arm GPU because it uses an Arm-specific
9+
counter sampling library.
10+
11+
## What data can be collected?
12+
13+
The layer serializes workloads for instrumented frames and injects counter
14+
samples between them, allowing the layer to measure the hardware cost of
15+
render passes, compute dispatches, transfers, etc.
16+
17+
The serialization is very invasive to wall-clock performance, due to removal
18+
of pipeline overlap between workloads and additional GPU idle time waiting for
19+
the layer to performs each performance counter sampling operation. This will
20+
have an impact on the counter data being captured!
21+
22+
Derived counters that show queue and functional unit utilization as a
23+
percentage of the overall "active" time of their parent block will report low
24+
because of time spent refilling and then draining the GPU pipeline between
25+
workloads. The overall _GPU Active Cycles_ counter is known to be unreliable,
26+
because the serialization means that command stream setup and teardown costs
27+
are not hidden in the shadow of surrounding work. We recommend using the
28+
individual queue active cycles counters as the main measure of performance.
29+
30+
Note that any counter that measure direct work, such as architectural issue
31+
cycles, or workload nouns, such as primitives or threads, should be unaffected
32+
by the loss of pipelining.
33+
34+
Arm GPUs provide a wide range of performance counters covering many different
35+
aspects of hardware performance. The layer will collect a standard set of
36+
counters by default but, with source modification, can collect any of the
37+
hardware counters and derived expressions supported by the
38+
[libGPUCounters][LGC] library that Arm provides on GitHub.
39+
40+
[LGC]: https://github.com/ARM-software/libGPUCounters
41+
42+
### GPU clock frequency impact
43+
44+
The GPU idle time waiting for the CPU to take a counter sample can cause the
45+
system DVFS power governor to decide that the GPU is not busy. In production
46+
devices we commonly see that the GPU will be down-clocked during the
47+
instrumented frame, which may have an impact on a subset of the available
48+
performance counters.
49+
50+
When running on a pre-production device we recommend pinning CPU, GPU, and bus
51+
clock speeds to avoid the performance instability.
52+
53+
## How do I use the layer?
54+
55+
### Prerequisites
56+
57+
Device setup steps:
58+
59+
* Ensure your Android device is in developer mode, with `adb` support enabled
60+
in developer settings.
61+
* Ensure the Android device is connected to your development workstation, and
62+
visible to `adb` with an authorized debug connection.
63+
64+
Application setup steps:
65+
66+
* Build a debuggable build of your application and install it on the Android
67+
device.
68+
69+
Tooling setup steps
70+
71+
* Install the Android platform tools and ensure `adb` is on your `PATH`
72+
environment variable.
73+
* Install the Android NDK and set the `ANDROID_NDK_HOME` environment variable
74+
to its installation path.
75+
76+
### Layer build
77+
78+
Build the Profile layer for Android using the provided build script, or using
79+
equivalent manual commands, from the `layer_gpu_profile` directory. For full
80+
instructions see the _Build an Android layer_ and _Build a Linux layer_
81+
sections in the [Build documentation](../docs/building.md).
82+
83+
### Running using the layer
84+
85+
You can configure a device to run a profile by using the Android helper utility
86+
found in the root directory to configure the layer and manage the application.
87+
You must enable the profile layer, and provide a configuration file to
88+
parameterize it.
89+
90+
```sh
91+
python3 lgl_android_install.py --layer layer_gpu_profile --config <your.json> --profile <out_dir>
92+
```
93+
94+
The [`layer_config.json`](layer_config.json) file in this directory is a
95+
template configuration file you can start from. It defaults to periodic
96+
sampling every 600 frames, but you can modify this to suit your needs.
97+
98+
The `--profile` option specifies an output directory on the host to contain
99+
the CSV files written by the tool. One CSV is written for each frame, each CSV
100+
containing a table with one row per workload profiled in the frame, listed
101+
in API submit order.
102+
103+
The Android helper utility contains many other options for configuring the
104+
application under test and the capture process. For full instructions see the
105+
[Running on Android documentation](../docs/running_android.md).
106+
107+
## Layer configuration
108+
109+
The current layer supports two `sampling_mode` values:
110+
111+
* `periodic_frame`: Sample every N frames.
112+
* `frame_list`: Sample specific frames.
113+
114+
When `mode` is `periodic_frame` the integer value of the `periodic_frame` key
115+
defines the frame sampling period. The integer value of the
116+
`periodic_min_frame` key defines the first possible frame that could be
117+
profiled, allowing profiles to skip over any loading frames. By default frame 0
118+
is ignored.
119+
120+
When `mode` is `frame_list` the value of the `frame_list` key defines a list
121+
of integers giving the specific frames to capture.
122+
123+
## Layer counters
124+
125+
The current layer uses a hard-coded set of performance counters defined in the
126+
`Device` class constructor. If you wish to collect different counters you must
127+
edit the [Device source](./source.device.cpp) and rebuild the layer.
128+
129+
Any counters that are specified but that are not available on the current GPU
130+
will be ignored.
131+
132+
- - -
133+
134+
_Copyright © 2025, Arm Limited and contributors._

layer_gpu_profile/android_build.sh

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/usr/bin/env bash
2+
# SPDX-License-Identifier: MIT
3+
# ----------------------------------------------------------------------------
4+
# Copyright (c) 2024-2025 Arm Limited
5+
#
6+
# Permission is hereby granted, free of charge, to any person obtaining a copy
7+
# of this software and associated documentation files (the "Software"), to
8+
# deal in the Software without restriction, including without limitation the
9+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
# sell copies of the Software, and to permit persons to whom the Software is
11+
# furnished to do so, subject to the following conditions:
12+
#
13+
# The above copyright notice and this permission notice shall be included in
14+
# all copies or substantial portions of the Software.
15+
#
16+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22+
# IN THE SOFTWARE.
23+
# ----------------------------------------------------------------------------
24+
25+
# ----------------------------------------------------------------------------
26+
# Configuration
27+
28+
# Exit immediately if any component command errors
29+
set -e
30+
31+
BUILD_DIR_64=build_arm64
32+
BUILD_DIR_PACK=build_package
33+
34+
# ----------------------------------------------------------------------------
35+
# Process command line options
36+
if [ "$#" -lt 1 ]; then
37+
BUILD_TYPE=Release
38+
else
39+
BUILD_TYPE=$1
40+
fi
41+
42+
# Process command line options
43+
if [ "$#" -lt 2 ]; then
44+
PACKAGE=0
45+
else
46+
PACKAGE=$2
47+
fi
48+
49+
if [ "${PACKAGE}" -gt "0" ]; then
50+
echo "Building a ${BUILD_TYPE} build with packaging"
51+
else
52+
echo "Building a ${BUILD_TYPE} build without packaging"
53+
fi
54+
55+
# ----------------------------------------------------------------------------
56+
# Build the 64-bit layer
57+
mkdir -p ${BUILD_DIR_64}
58+
pushd ${BUILD_DIR_64}
59+
60+
cmake \
61+
-DCMAKE_SYSTEM_NAME=Android \
62+
-DANDROID_PLATFORM=29 \
63+
-DANDROID_ABI=arm64-v8a \
64+
-DANDROID_TOOLCHAIN=clang \
65+
-DANDROID_STL=c++_static \
66+
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
67+
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \
68+
-DCMAKE_WARN_DEPRECATED=OFF \
69+
..
70+
71+
make -j16
72+
73+
popd
74+
75+
# ----------------------------------------------------------------------------
76+
# Build the release package
77+
if [ "${PACKAGE}" -gt "0" ]; then
78+
# Setup the package directories
79+
mkdir -p ${BUILD_DIR_PACK}/bin/android/arm64
80+
81+
# Install the 64-bit layer
82+
cp ${BUILD_DIR_64}/source/*.so ${BUILD_DIR_PACK}/bin/android/arm64
83+
fi

0 commit comments

Comments
 (0)