Skip to content

Commit dd8b8a8

Browse files
Rework HWCPipe (#11)
- Add a middle layer for specifying counters in a platform-independent way - Create interfaces for CPU/GPU profilers to enhance extensibility - Improve performance when sampling counters - Avoid unnecessary memory allocations - Support enabling counters via code or via JSON string
1 parent 49809e2 commit dd8b8a8

24 files changed

+22578
-1435
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,7 @@
3030
*.exe
3131
*.out
3232
*.app
33+
34+
# Ctags
35+
.tags
36+
.tags1

CMakeLists.txt

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,55 @@
1+
# Copyright (c) 2019, Arm Limited and Contributors
2+
#
3+
# SPDX-License-Identifier: MIT
4+
#
5+
# Permission is hereby granted, free of charge,
6+
# to any person obtaining a copy of this software and associated documentation files (the "Software"),
7+
# to deal in the Software without restriction, including without limitation the rights to
8+
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
9+
# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10+
#
11+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
14+
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17+
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19+
#
20+
121
cmake_minimum_required(VERSION 3.6)
222

323
project(hwcpipe LANGUAGES C CXX)
424

525
set(PROJECT_FILES
6-
instrument.h
7-
instruments_stats.h
8-
measurement.h
9-
instruments_stats.cpp)
10-
11-
if(ANDROID)
12-
list(APPEND PROJECT_FILES
13-
hwc.hpp
14-
hwc_names.hpp
15-
mali_counter.h
16-
mali_counter.cpp)
17-
endif()
26+
hwcpipe.h
27+
cpu_profiler.h
28+
gpu_profiler.h
29+
value.h
30+
31+
hwcpipe.cpp)
1832

1933
if(UNIX AND NOT APPLE)
2034
list(APPEND PROJECT_FILES
21-
pmu.h
22-
pmu_counter.h
23-
24-
pmu.cpp
25-
pmu_counter.cpp)
35+
vendor/arm/mali/hwc.hpp
36+
vendor/arm/mali/hwc_names.hpp
37+
vendor/arm/mali/mali_profiler.h
38+
vendor/arm/mali/mali_profiler.cpp)
39+
40+
list(APPEND PROJECT_FILES
41+
vendor/arm/pmu/pmu_counter.h
42+
vendor/arm/pmu/pmu_profiler.h
43+
vendor/arm/pmu/pmu_counter.cpp
44+
vendor/arm/pmu/pmu_profiler.cpp)
2645
endif()
27-
46+
2847
source_group("\\" FILES ${PROJECT_FILES})
2948

3049
add_library(${PROJECT_NAME} STATIC ${PROJECT_FILES})
3150

3251
target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
3352

53+
target_include_directories(${PROJECT_NAME} PUBLIC third_party)
54+
3455
set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11)

README.md

Lines changed: 119 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,156 @@
1-
# HWCPipe
1+
<!--
2+
- Copyright (c) 2019, Arm Limited and Contributors
3+
-
4+
- SPDX-License-Identifier: MIT
5+
-
6+
- Permission is hereby granted, free of charge,
7+
- to any person obtaining a copy of this software and associated documentation files (the "Software"),
8+
- to deal in the Software without restriction, including without limitation the rights to
9+
- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
10+
- and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
11+
-
12+
- The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
13+
-
14+
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
15+
- INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17+
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18+
- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20+
-
21+
-->
222

23+
# HWCPipe
324

425
## Introduction
526

6-
HWCPipe is an interface to the Arm Hardware Counters, designed to allow for easily interfacing with and reading the hardware counters built into Arm hardware.
7-
27+
HWCPipe is a simple and extensible interface for reading CPU and GPU hardware counters.
828

929
## License
1030

11-
The software is provided under an MIT license. Contributions to this project are accepted under the same license.
31+
The software is provided under an MIT license.
32+
33+
This project has a third-party dependency, which may have independent licensing:
34+
35+
- [nlohmann/json](https://github.com/nlohmann/json): A JSON library for modern C++
1236

37+
## Contributions
38+
39+
All contributions are accepted under the same [LICENSE](LICENSE).
1340

1441
## Building
1542

16-
To use HWCPipe, build it as a shared library in your Android Project, to do this it must be integrated into your project with CMake.
43+
To use HWCPipe, build it as a shared library in your project.
1744

45+
If your project uses CMake, you can add the following to your `CMakeLists.txt`:
1846

19-
## Using
47+
```
48+
add_subdirectory(hwcpipe)
49+
```
2050

21-
### Performance data
51+
## Usage
2252

23-
In order for performance data to be displayed, profiling needs to be enabled on the device.
24-
Some devices may disable it by default.
53+
### Using HWCPipe
2554

26-
Profiling can be enabled via adb:
55+
Basic usage for HWCPipe is simple:
2756

2857
```
29-
adb shell setprop security.perf_harden 0
58+
// HWCPipe performs automated platform detection for CPU/GPU counters
59+
hwcpipe::HWCPipe h;
60+
61+
// Start HWCPipe once at the beginning of the profiling session
62+
h.run();
63+
64+
while (main_loop) {
65+
// Call sample() to sample counters with the frequency you need
66+
auto measurements = h.sample();
67+
68+
[...]
69+
}
70+
71+
// At the end of the profiling session, stop HWCPipe
72+
h.stop();
3073
```
3174

32-
#### Enabling a Counter:
75+
The `sample` function returns a `Measurements` struct, which can be accessed like this:
76+
77+
```
78+
// Check if CPU measurements are available
79+
if (measurements.cpu)
80+
{
81+
// Look for a counter in the map
82+
const auto &counter = measurements.cpu->find(CpuCounter::Cycles);
83+
if (counter != measurements.cpu->end())
84+
{
85+
// Get the data stored in the counter, casted to the type you need
86+
auto value = cpu_res->counter.get<float>();
87+
}
88+
}
89+
```
3390

34-
To enable a counter, create either a PMU or Mali counter and then call its start function.
91+
### Enabling counters
92+
93+
The available counters are specified in the `CpuCounter` and `GpuCounter` enums (`cpu_profiler.h` and `gpu_profiler.h` respectively).
94+
95+
Platforms will support a subset of these counters, which can be queried via:
3596

3697
```
37-
Instrument instrument_ = PMUCounter();
38-
instrument_.start();
98+
auto cpu_counters = h.cpu_profiler()->supported_counters();
99+
auto gpu_counters = h.gpu_profiler()->supported_counters()
39100
```
40101

41-
#### Reading a Counter:
102+
You can specify the counters to be enabled in the following ways:
42103

43-
To read a counter, first stop it and then call its measurements function to store results in the MeasurementsMap variable which can then be read from.
104+
```
105+
// Enable a default set of counters
106+
auto h = hwcpipe::HWCPipe();
44107
108+
// Pass sets of CPU and GPU counters to be enabled
109+
auto h = hwcpipe::HWCPipe({CpuCounter::Cycles, CpuCounter::Instructions}, {GpuCounter::GpuCycles});
110+
111+
// Pass a JSON string
112+
auto h = hwcpipe::HWCPipe(json);
45113
```
46-
instrument_.stop();
47-
MeasurementsMap measurements = instrument_.measurements();
114+
115+
The JSON string should be formatted like this:
116+
117+
```
118+
{
119+
"cpu": ["Cycles", "Instructions"],
120+
"gpu": ["GpuCycles"]
121+
}
48122
```
49123

124+
Available counter names can be found in `cpu_counter_names` (`cpu_profiler.h`) and `gpu_counter_names` (`gpu_profiler.h`).
125+
126+
For more information regarding Mali counters, see [Mali Performance Counters](https://community.arm.com/graphics/b/blog/posts/mali-bifrost-family-performance-counters).
127+
128+
### Enabling profiling on Android
129+
130+
In order for performance data to be displayed, profiling needs to be enabled on the device.
131+
Some devices may disable it by default.
132+
133+
Profiling can be enabled via `adb`:
134+
135+
```
136+
adb shell setprop security.perf_harden 0
137+
```
50138

51-
## Counters
139+
## Adding support for a new platform
52140

53-
The counters are separated into two categories: PMU and Mali counters, the available counters are:
141+
If the counters provided in `CpuCounter` and `GpuCounter` are enough for the new platform,
142+
the process is simple:
54143

55-
#### PMU
144+
* Add an implementation of either `CpuProfiler` of `GpuProfiler` (you can use `PmuProfiler` and `MaliProfiler` as references).
145+
* Add your platform to the automated platform detection in `hwcpipe.cpp`. For consistency in platform detection, the constructor for your platform should throw if the platform is not available.
146+
* Add your platform to the build system.
56147

57-
- CPU cycles
58-
- CPU instructions
59-
- Cache miss ratio
60-
- Branch miss ratio
148+
### Adding new counters
61149

62-
#### Mali
150+
If you need to add new counters to the existing ones, you should update the following variables:
63151

64-
- Timespan
65-
- GPU cycles
66-
- Fragment Jobs
67-
- Vertex/compute jobs
68-
- L2 cache read lookups
69-
- L2 cache external reads
70-
- L2 cache external read stalls
71-
- L2 cache external read beats
72-
- L2 cache write lookups
73-
- L2 cache external writes
74-
- L2 cache external write stalls
75-
- L2 cache external write beats
152+
* Add the counter to the `CpuCounter`/`GpuCounter` enum.
153+
* Add the counter name to the `cpu_counter_names`/`gpu_counter_names` map (necessary for JSON initialization).
154+
* Add a description and the unit for your counter to the `cpu_counter_info`/`gpu_counter_info` map.
76155

77-
For more information regarding these counters, see [Mali Performance Counters](https://community.arm.com/graphics/b/blog/posts/mali-bifrost-family-performance-counters).
156+
The `CpuCounter` and `GpuCounter` enums are meant to be expanded. Platforms must not break if new counters are added.

cpu_profiler.h

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* Copyright (c) 2019 ARM Limited.
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to
8+
* deal in the Software without restriction, including without limitation the
9+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
* sell copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
25+
#pragma once
26+
27+
#include "value.h"
28+
29+
#include <string>
30+
#include <unordered_map>
31+
#include <unordered_set>
32+
33+
namespace hwcpipe
34+
{
35+
// The available CPU counters. Profiler implementations will support a subset of them.
36+
enum class CpuCounter
37+
{
38+
Cycles,
39+
Instructions,
40+
CacheReferences,
41+
CacheMisses,
42+
BranchInstructions,
43+
BranchMisses,
44+
45+
MaxValue
46+
};
47+
48+
// Mapping from CPU counter names to enum values. Used for JSON initialization.
49+
const std::unordered_map<std::string, CpuCounter> cpu_counter_names{
50+
{"Cycles", CpuCounter::Cycles},
51+
{"Instructions", CpuCounter::Instructions},
52+
{"CacheReferences", CpuCounter::CacheReferences},
53+
{"CacheMisses", CpuCounter::CacheMisses},
54+
{"BranchInstructions", CpuCounter::BranchInstructions},
55+
{"BranchMisses", CpuCounter::BranchMisses},
56+
};
57+
58+
// A hash function for CpuCounter values
59+
struct CpuCounterHash
60+
{
61+
template <typename T>
62+
std::size_t operator()(T t) const
63+
{
64+
return static_cast<std::size_t>(t);
65+
}
66+
};
67+
68+
struct CpuCounterInfo
69+
{
70+
std::string desc;
71+
std::string unit;
72+
};
73+
74+
// Mapping from each counter to its corresponding information (description and unit)
75+
const std::unordered_map<CpuCounter, CpuCounterInfo, CpuCounterHash> cpu_counter_info{
76+
{CpuCounter::Cycles, {"Number of CPU cycles", "cycles"}},
77+
{CpuCounter::Instructions, {"Number of CPU instructions", "instructions"}},
78+
{CpuCounter::CacheReferences, {"Number of cache references", "references"}},
79+
{CpuCounter::CacheMisses, {"Number of cache misses", "misses"}},
80+
{CpuCounter::BranchInstructions, {"Number of branch instructions", "instructions"}},
81+
{CpuCounter::BranchMisses, {"Number of branch misses", "misses"}},
82+
};
83+
84+
typedef std::unordered_set<CpuCounter, CpuCounterHash> CpuCounterSet;
85+
typedef std::unordered_map<CpuCounter, Value, CpuCounterHash>
86+
CpuMeasurements;
87+
88+
/** An interface for classes that collect CPU performance data. */
89+
class CpuProfiler
90+
{
91+
public:
92+
virtual ~CpuProfiler() = default;
93+
94+
// Returns the enabled counters
95+
virtual const CpuCounterSet &enabled_counters() const = 0;
96+
97+
// Returns the counters that the platform supports
98+
virtual const CpuCounterSet &supported_counters() const = 0;
99+
100+
// Sets the enabled counters after initialization
101+
virtual void set_enabled_counters(CpuCounterSet counters) = 0;
102+
103+
// Starts a profiling session
104+
virtual void run() = 0;
105+
106+
// Sample the counters. Returns a map of measurements for the counters
107+
// that are both available and enabled.
108+
// A profiling session must be running when sampling the counters.
109+
virtual const CpuMeasurements &sample() = 0;
110+
111+
// Stops the active profiling session
112+
virtual void stop() = 0;
113+
};
114+
115+
} // namespace hwcpipe

0 commit comments

Comments
 (0)