Skip to content

Commit c25731b

Browse files
issue-4576: collect execution time stats separately for different request sizes (#4577)
#4576
1 parent b191f35 commit c25731b

File tree

22 files changed

+589
-128
lines changed

22 files changed

+589
-128
lines changed

cloud/blockstore/config/diagnostics.proto

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,4 +231,15 @@ message TDiagnosticsConfig
231231
optional bool SkipReportingZeroBlocksMetricsForYDBBasedDisks = 53;
232232

233233
optional NCloud.NProto.TOpentelemetryTraceConfig OpentelemetryTraceConfig = 54;
234+
235+
// Represents [Start, End) interval.
236+
message TInterval
237+
{
238+
required uint64 Start = 1;
239+
required uint64 End = 2;
240+
};
241+
242+
// Request size intervals that should be measured separately (in bytes).
243+
// Intersections between ExecutionTimeSizeClasses are not allowed.
244+
repeated TInterval ExecutionTimeSizeClasses = 56;
234245
}

cloud/blockstore/libs/daemon/common/bootstrap.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,8 @@ void TBootstrapBase::Init()
300300
RequestStats = CreateServerRequestStats(
301301
serverGroup,
302302
Timer,
303-
Configs->DiagnosticsConfig->GetHistogramCounterOptions());
303+
Configs->DiagnosticsConfig->GetHistogramCounterOptions(),
304+
Configs->DiagnosticsConfig->GetExecutionTimeSizeClasses());
304305

305306
if (!VolumeStats) {
306307
VolumeStats = CreateVolumeStats(

cloud/blockstore/libs/diagnostics/config.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ namespace {
6060
\
6161
xxx(SkipReportingZeroBlocksMetricsForYDBBasedDisks, bool, false )\
6262
xxx(OpentelemetryTraceConfig, ::NCloud::NProto::TOpentelemetryTraceConfig, {} )\
63+
\
64+
xxx(ExecutionTimeSizeClasses, TVector<TSizeInterval>, {} )\
6365
// BLOCKSTORE_DIAGNOSTICS_CONFIG
6466

6567
#define BLOCKSTORE_DIAGNOSTICS_DECLARE_CONFIG(name, type, value) \
@@ -119,6 +121,18 @@ TVector<TString> ConvertValue(
119121
return v;
120122
}
121123

124+
template <>
125+
TVector<TSizeInterval> ConvertValue(
126+
const google::protobuf::RepeatedPtrField<
127+
NProto::TDiagnosticsConfig::TInterval>& value)
128+
{
129+
TVector<TSizeInterval> v;
130+
for (const auto& x : value) {
131+
v.push_back({x.GetStart(), x.GetEnd()});
132+
}
133+
return v;
134+
}
135+
122136
template <typename T>
123137
void DumpImpl(const T& t, IOutputStream& os)
124138
{
@@ -136,6 +150,17 @@ void DumpImpl(const TVector<TString>& value, IOutputStream& os)
136150
}
137151
}
138152

153+
template <>
154+
void DumpImpl(const TVector<TSizeInterval>& value, IOutputStream& os)
155+
{
156+
for (size_t i = 0; i < value.size(); ++i) {
157+
if (i) {
158+
os << ",";
159+
}
160+
os << ToString(value[i]);
161+
}
162+
}
163+
139164
} // namespace
140165

141166
////////////////////////////////////////////////////////////////////////////////

cloud/blockstore/libs/diagnostics/config.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
#include <cloud/blockstore/config/diagnostics.pb.h>
66

7-
#include "cloud/storage/core/libs/diagnostics/histogram_counter_options.h"
7+
#include <cloud/storage/core/libs/common/size_interval.h>
8+
#include <cloud/storage/core/libs/diagnostics/histogram_counter_options.h>
89
#include <cloud/storage/core/libs/diagnostics/trace_reader.h>
910

1011
#include <util/generic/string.h>
@@ -166,6 +167,8 @@ class TDiagnosticsConfig
166167
[[nodiscard]] NCloud::NProto::TOpentelemetryTraceConfig
167168
GetOpentelemetryTraceConfig() const;
168169

170+
[[nodiscard]] TVector<TSizeInterval> GetExecutionTimeSizeClasses() const;
171+
169172
void Dump(IOutputStream& out) const;
170173
void DumpHtml(IOutputStream& out) const;
171174
};

cloud/blockstore/libs/diagnostics/request_stats.cpp

Lines changed: 105 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
#include <cloud/blockstore/libs/service/request_helpers.h>
66

7+
#include <cloud/storage/core/libs/common/disjoint_interval_map.h>
8+
#include <cloud/storage/core/libs/common/format.h>
79
#include <cloud/storage/core/libs/diagnostics/histogram.h>
810
#include <cloud/storage/core/libs/diagnostics/max_calculator.h>
911
#include <cloud/storage/core/libs/diagnostics/request_counters.h>
@@ -26,19 +28,38 @@ class THdrRequestPercentiles
2628
{
2729
using TDynamicCounterPtr = TDynamicCounters::TCounterPtr;
2830

31+
struct TSizeClassCounters
32+
{
33+
// TLatencyHistogram is not movable, thats why we should wrap it in
34+
// unique_ptr.
35+
std::unique_ptr<TLatencyHistogram> ExecutionTimeHist;
36+
TVector<TDynamicCounterPtr> CountersExecutionTime;
37+
};
38+
2939
private:
3040
TVector<TDynamicCounterPtr> CountersExecutionTime;
3141
TVector<TDynamicCounterPtr> CountersTotal;
3242
TVector<TDynamicCounterPtr> CountersSize;
3343

44+
TDisjointIntervalMap<ui64, TSizeClassCounters> ExecutionTimeSizeClasses;
45+
3446
TLatencyHistogram ExecutionTimeHist;
3547
TLatencyHistogram TotalHist;
3648
TSizeHistogram SizeHist;
3749

3850
public:
39-
void Register(
40-
TDynamicCounters& counters,
41-
const TString& request)
51+
explicit THdrRequestPercentiles(
52+
const TVector<TSizeInterval>& executionTimeSizeClasses)
53+
{
54+
for (const auto& [start, end]: executionTimeSizeClasses) {
55+
ExecutionTimeSizeClasses.Add(
56+
start,
57+
end,
58+
{.ExecutionTimeHist = std::make_unique<TLatencyHistogram>()});
59+
}
60+
}
61+
62+
void Register(TDynamicCounters& counters, const TString& request)
4263
{
4364
auto requestGroup = counters.GetSubgroup("request", request);
4465

@@ -50,20 +71,41 @@ class THdrRequestPercentiles
5071

5172
auto sizeGroup = requestGroup->GetSubgroup("percentiles", "Size");
5273
Register(*sizeGroup, CountersSize);
74+
75+
for (auto& [_, item]: ExecutionTimeSizeClasses) {
76+
const auto sizeClassName =
77+
ToString(TSizeInterval{item.Begin, item.End});
78+
79+
auto sizeClassCounters =
80+
executionTimeGroup->GetSubgroup("sizeclass", sizeClassName);
81+
82+
Register(*sizeClassCounters, item.Value.CountersExecutionTime);
83+
}
5384
}
5485

5586
void UpdateStats()
5687
{
5788
Update(CountersTotal, TotalHist);
5889
Update(CountersSize, SizeHist);
5990
Update(CountersExecutionTime, ExecutionTimeHist);
91+
for (auto& [_, item]: ExecutionTimeSizeClasses) {
92+
Update(item.Value.CountersExecutionTime, *item.Value.ExecutionTimeHist);
93+
}
6094
}
6195

62-
void AddStats(TDuration requestExecutionTime, TDuration requestTime, ui32 requestBytes)
96+
void AddStats(
97+
TDuration requestExecutionTime,
98+
TDuration requestTime,
99+
ui32 requestBytes)
63100
{
64101
ExecutionTimeHist.RecordValue(requestExecutionTime);
65102
TotalHist.RecordValue(requestTime);
66103
SizeHist.RecordValue(requestBytes);
104+
ExecutionTimeSizeClasses.VisitOverlapping(
105+
requestBytes,
106+
requestBytes + 1,
107+
[&](TDisjointIntervalMap<ui64, TSizeClassCounters>::TIterator it)
108+
{ it->second.Value.ExecutionTimeHist->RecordValue(requestTime); });
67109
}
68110

69111
void BatchCompleted(
@@ -114,6 +156,13 @@ class THdrPercentiles
114156
THdrRequestPercentiles ZeroBlocksPercentiles;
115157

116158
public:
159+
explicit THdrPercentiles(
160+
const TVector<TSizeInterval>& executionTimeSizeClasses)
161+
: ReadBlocksPercentiles(executionTimeSizeClasses)
162+
, WriteBlocksPercentiles(executionTimeSizeClasses)
163+
, ZeroBlocksPercentiles(executionTimeSizeClasses)
164+
{}
165+
117166
void Register(TDynamicCounters& counters)
118167
{
119168
ReadBlocksPercentiles.Register(
@@ -176,6 +225,31 @@ class THdrPercentiles
176225

177226
////////////////////////////////////////////////////////////////////////////////
178227

228+
constexpr TRequestCounters::EOptions DefaultOptions =
229+
TRequestCounters::EOption::ReportDataPlaneHistogram |
230+
TRequestCounters::EOption::AddSpecialCounters |
231+
TRequestCounters::EOption::OnlyReadWriteRequests;
232+
233+
constexpr TRequestCounters::EOptions GeneralOptions =
234+
TRequestCounters::EOption::ReportDataPlaneHistogram |
235+
TRequestCounters::EOption::AddSpecialCounters;
236+
237+
constexpr TRequestCounters::EOptions SSDOrHDDOptions =
238+
TRequestCounters::EOption::ReportDataPlaneHistogram |
239+
TRequestCounters::EOption::OnlyReadWriteRequests;
240+
241+
#define BLOCKSTORE_MEDIA_KIND(xxx, ...) \
242+
xxx(, GeneralOptions __VA_ARGS__ )\
243+
xxx(SSD, SSDOrHDDOptions __VA_ARGS__ )\
244+
xxx(HDD, SSDOrHDDOptions __VA_ARGS__ )\
245+
xxx(SSDNonrepl, DefaultOptions, __VA_ARGS__ )\
246+
xxx(SSDMirror2, DefaultOptions, __VA_ARGS__ )\
247+
xxx(SSDMirror3, DefaultOptions, __VA_ARGS__ )\
248+
xxx(SSDLocal, DefaultOptions, __VA_ARGS__ )\
249+
xxx(HDDLocal, DefaultOptions, __VA_ARGS__ )\
250+
xxx(HDDNonrepl, DefaultOptions, __VA_ARGS__ )\
251+
// BLOCKSTORE_MEDIA_KIND
252+
179253
class TRequestStats final
180254
: public IRequestStats
181255
, public std::enable_shared_from_this<TRequestStats>
@@ -205,64 +279,30 @@ class TRequestStats final
205279
THdrPercentiles HdrTotalHDDNonrepl;
206280

207281
public:
282+
283+
#define INITIALIZE_REQUEST_COUNTERS(name, options, ...) \
284+
, Total##name(MakeRequestCounters( \
285+
timer, \
286+
options, \
287+
histogramCounterOptions, \
288+
executionTimeSizeClasses)) \
289+
// INITIALIZE_REQUEST_COUNTERS
290+
291+
#define INITIALIZE_HDR_PERCENTILES(name, ...) \
292+
, HdrTotal##name( \
293+
executionTimeSizeClasses) \
294+
// INITIALIZE_HDR_PERCENTILES
295+
208296
TRequestStats(
209297
TDynamicCountersPtr counters,
210298
bool isServerSide,
211299
ITimerPtr timer,
212-
EHistogramCounterOptions histogramCounterOptions)
300+
EHistogramCounterOptions histogramCounterOptions,
301+
const TVector<TSizeInterval>& executionTimeSizeClasses)
213302
: Counters(std::move(counters))
214303
, IsServerSide(isServerSide)
215-
, Total(MakeRequestCounters(
216-
timer,
217-
TRequestCounters::EOption::ReportDataPlaneHistogram |
218-
TRequestCounters::EOption::AddSpecialCounters,
219-
histogramCounterOptions))
220-
, TotalSSD(MakeRequestCounters(
221-
timer,
222-
TRequestCounters::EOption::ReportDataPlaneHistogram |
223-
TRequestCounters::EOption::OnlyReadWriteRequests,
224-
histogramCounterOptions))
225-
, TotalHDD(MakeRequestCounters(
226-
timer,
227-
TRequestCounters::EOption::ReportDataPlaneHistogram |
228-
TRequestCounters::EOption::OnlyReadWriteRequests,
229-
histogramCounterOptions))
230-
, TotalSSDNonrepl(MakeRequestCounters(
231-
timer,
232-
TRequestCounters::EOption::ReportDataPlaneHistogram |
233-
TRequestCounters::EOption::AddSpecialCounters |
234-
TRequestCounters::EOption::OnlyReadWriteRequests,
235-
histogramCounterOptions))
236-
, TotalSSDMirror2(MakeRequestCounters(
237-
timer,
238-
TRequestCounters::EOption::ReportDataPlaneHistogram |
239-
TRequestCounters::EOption::AddSpecialCounters |
240-
TRequestCounters::EOption::OnlyReadWriteRequests,
241-
histogramCounterOptions))
242-
, TotalSSDMirror3(MakeRequestCounters(
243-
timer,
244-
TRequestCounters::EOption::ReportDataPlaneHistogram |
245-
TRequestCounters::EOption::AddSpecialCounters |
246-
TRequestCounters::EOption::OnlyReadWriteRequests,
247-
histogramCounterOptions))
248-
, TotalSSDLocal(MakeRequestCounters(
249-
timer,
250-
TRequestCounters::EOption::ReportDataPlaneHistogram |
251-
TRequestCounters::EOption::AddSpecialCounters |
252-
TRequestCounters::EOption::OnlyReadWriteRequests,
253-
histogramCounterOptions))
254-
, TotalHDDLocal(MakeRequestCounters(
255-
timer,
256-
TRequestCounters::EOption::ReportDataPlaneHistogram |
257-
TRequestCounters::EOption::AddSpecialCounters |
258-
TRequestCounters::EOption::OnlyReadWriteRequests,
259-
histogramCounterOptions))
260-
, TotalHDDNonrepl(MakeRequestCounters(
261-
timer,
262-
TRequestCounters::EOption::ReportDataPlaneHistogram |
263-
TRequestCounters::EOption::AddSpecialCounters |
264-
TRequestCounters::EOption::OnlyReadWriteRequests,
265-
histogramCounterOptions))
304+
BLOCKSTORE_MEDIA_KIND(INITIALIZE_REQUEST_COUNTERS)
305+
BLOCKSTORE_MEDIA_KIND(INITIALIZE_HDR_PERCENTILES)
266306
{
267307
Total.Register(*Counters);
268308

@@ -303,6 +343,9 @@ class TRequestStats final
303343
}
304344
}
305345

346+
#undef INITIALIZE_REQUEST_COUNTERS
347+
#undef INITIALIZE_HDR_PERCENTILES
348+
306349
ui64 RequestStarted(
307350
NCloud::NProto::EStorageMediaKind mediaKind,
308351
EBlockStoreRequest requestType,
@@ -708,19 +751,22 @@ IRequestStatsPtr CreateClientRequestStats(
708751
std::move(counters),
709752
false,
710753
std::move(timer),
711-
histogramCounterOptions);
754+
histogramCounterOptions,
755+
TVector<TSizeInterval>{});
712756
}
713757

714758
IRequestStatsPtr CreateServerRequestStats(
715759
TDynamicCountersPtr counters,
716760
ITimerPtr timer,
717-
EHistogramCounterOptions histogramCounterOptions)
761+
EHistogramCounterOptions histogramCounterOptions,
762+
const TVector<TSizeInterval>& executionTimeSizeClasses)
718763
{
719764
return std::make_shared<TRequestStats>(
720765
std::move(counters),
721766
true,
722767
std::move(timer),
723-
histogramCounterOptions);
768+
histogramCounterOptions,
769+
executionTimeSizeClasses);
724770
}
725771

726772
IRequestStatsPtr CreateRequestStatsStub()

cloud/blockstore/libs/diagnostics/request_stats.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ IRequestStatsPtr CreateClientRequestStats(
8080
IRequestStatsPtr CreateServerRequestStats(
8181
NMonitoring::TDynamicCountersPtr counters,
8282
ITimerPtr timer,
83-
EHistogramCounterOptions histogramCounterOptions);
83+
EHistogramCounterOptions histogramCounterOptions,
84+
const TVector<TSizeInterval>& executionTimeSizeClasses);
8485
IRequestStatsPtr CreateRequestStatsStub();
8586

8687
} // namespace NCloud::NBlockStore

0 commit comments

Comments
 (0)