Skip to content

Commit 4846843

Browse files
committed
Profile layer: Add frame serialization config option
1 parent f123779 commit 4846843

File tree

5 files changed

+49
-6
lines changed

5 files changed

+49
-6
lines changed

layer_gpu_profile/README_LAYER.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,14 @@ to profile using the `sample_mode` config option:
140140
* `workload`: Sample every workload in each frame of interest.
141141
* `frame`: Sample at the end of each frame of interest.
142142

143+
By default per-frame samples are isolated from other frames by inserting a
144+
`vkDeviceWaitIdle()` before and after the frame to ensure that workload
145+
in the sampled region does not overlap neighboring frames. Setting the
146+
`frame_serialization` config option to `false` will allow frames to overlap
147+
without serialization, but can add noise to the returned counter values. This
148+
option has no effect for per-workload sampling, which must always use
149+
serialization.
150+
143151
## Layer counters
144152

145153
The current layer uses a hard-coded set of performance counters defined in the

layer_gpu_profile/layer_config.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@
44
"sample_mode": "frame",
55
"periodic_min_frame": 1,
66
"periodic_frame": 600,
7-
"frame_list": []
7+
"frame_list": [],
8+
"frame_serialization": false
89
}

layer_gpu_profile/source/layer_config.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ void LayerConfig::parseSamplingOptions(const json& config)
9090
rawSampleMode = "disabled";
9191
}
9292

93+
// Decode frame serialization mode
94+
frameSerialization = config.at("frame_serialization");
95+
9396
LAYER_LOG("Layer sampling configuration");
9497
LAYER_LOG("============================");
9598
LAYER_LOG(" - Frame selection mode: %s", rawFrameMode.c_str());
@@ -107,6 +110,11 @@ void LayerConfig::parseSamplingOptions(const json& config)
107110
}
108111

109112
LAYER_LOG(" - Counter sampling mode: %s", rawSampleMode.c_str());
113+
114+
if (samplingMode == COUNTER_SAMPLING_FRAMES)
115+
{
116+
LAYER_LOG(" - Frame serialization: %u", frameSerialization);
117+
}
110118
}
111119

112120
/* See header for documentation. */
@@ -172,20 +180,29 @@ bool LayerConfig::isFrameOfInterest(
172180
}
173181

174182
/* See header for documentation. */
175-
bool LayerConfig::isSamplingWorkloads() const {
183+
bool LayerConfig::isSamplingWorkloads() const
184+
{
176185
return frameMode != FRAME_SELECTION_DISABLED &&
177186
samplingMode == COUNTER_SAMPLING_WORKLOADS;
178187
}
179188

180189
/* See header for documentation. */
181-
bool LayerConfig::isSamplingFrames() const {
190+
bool LayerConfig::isSamplingFrames() const
191+
{
182192
return frameMode != FRAME_SELECTION_DISABLED &&
183193
samplingMode == COUNTER_SAMPLING_FRAMES;
184194
}
185195

186196
/* See header for documentation. */
187-
bool LayerConfig::isSamplingAny() const {
197+
bool LayerConfig::isSamplingAny() const
198+
{
188199
return frameMode != FRAME_SELECTION_DISABLED &&
189200
samplingMode != COUNTER_SAMPLING_DISABLED;
190201
}
191202

203+
/* See header for documentation. */
204+
bool LayerConfig::isSerializingFrames() const
205+
{
206+
return isSamplingWorkloads() ||
207+
(isSamplingFrames() && frameSerialization);
208+
};

layer_gpu_profile/source/layer_config.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,13 @@ class LayerConfig
7979
*/
8080
bool isSamplingAny() const;
8181

82+
/**
83+
* @brief Test if we are serializing frames.
84+
*
85+
* @return @c true if serializing, @c false otherwise.
86+
*/
87+
bool isSerializingFrames() const;
88+
8289
private:
8390
/**
8491
* @brief Supported frame selection modes.
@@ -119,6 +126,10 @@ class LayerConfig
119126
*/
120127
CounterSamplingMode samplingMode {COUNTER_SAMPLING_DISABLED};
121128

129+
/**
130+
* @brief The frame sample serialization mode.
131+
*/
132+
bool frameSerialization {true};
122133

123134
/**
124135
* @brief The sampling period in frames, or 0 if disabled.

layer_gpu_profile/source/layer_device_functions_queue.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,16 @@ static void processFrameBoundaryPostSubmit(
140140
Device& layer,
141141
bool frameSample
142142
) {
143+
const auto& config = layer.instance->config;
144+
143145
// If we are measuring performance ensure the previous frame has finished
144146
// and then take an initial sample to reset the counters
145-
layer.driver.vkDeviceWaitIdle(layer.device);
146-
workaroundDelay();
147+
if (config.isSerializingFrames())
148+
{
149+
layer.driver.vkDeviceWaitIdle(layer.device);
150+
workaroundDelay();
151+
}
152+
147153
auto ec = layer.lgcSampler->sample_now();
148154
if (ec)
149155
{

0 commit comments

Comments
 (0)