Skip to content

Commit 6fae490

Browse files
committed
Add CompoundTimeHistogram that allows to collect both ms and us metrics
1 parent d3137c8 commit 6fae490

File tree

11 files changed

+193
-23
lines changed

11 files changed

+193
-23
lines changed

cloud/blockstore/config/diagnostics.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,5 +230,9 @@ message TDiagnosticsConfig
230230
// Skip reporting ZeroBlocks metrics as part of WriteBlocks metrics
231231
optional bool SkipReportingZeroBlocksMetricsForYDBBasedDisks = 53;
232232

233+
// Opentelemetry trace config
233234
optional NCloud.NProto.TOpentelemetryTraceConfig OpentelemetryTraceConfig = 54;
235+
236+
// Time histogram units for server metrics
237+
optional NCloud.NProto.ETimeHistogramUnits ServerTimeHistogramUnits = 56;
234238
}

cloud/blockstore/libs/diagnostics/config.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ namespace {
5656
xxx(LocalHDDDowntimeThreshold, TDuration, TDuration::Seconds(15) )\
5757
xxx(ReportHistogramAsMultipleCounters, bool, true )\
5858
xxx(ReportHistogramAsSingleCounter, bool, false )\
59-
xxx(StatsFetcherType, NCloud::NProto::EStatsFetcherType, NCloud::NProto::EStatsFetcherType::CGROUP )\
59+
xxx(ServerTimeHistogramUnits, NCloud::NProto::ETimeHistogramUnits, NCloud::NProto::ETimeHistogramUnits::THU_US )\
60+
xxx(StatsFetcherType, NCloud::NProto::EStatsFetcherType, NCloud::NProto::EStatsFetcherType::CGROUP )\
6061
\
6162
xxx(SkipReportingZeroBlocksMetricsForYDBBasedDisks, bool, false )\
6263
xxx(OpentelemetryTraceConfig, ::NCloud::NProto::TOpentelemetryTraceConfig, {} )\
@@ -173,6 +174,22 @@ EHistogramCounterOptions TDiagnosticsConfig::GetHistogramCounterOptions() const
173174
if (GetReportHistogramAsSingleCounter()) {
174175
histogramCounterOptions |= EHistogramCounterOption::ReportSingleCounter;
175176
}
177+
switch (GetServerTimeHistogramUnits()) {
178+
case NCloud::NProto::ETimeHistogramUnits::THU_US:
179+
histogramCounterOptions |=
180+
EHistogramCounterOption::UseUsUnitsForTimeHistogram;
181+
break;
182+
case NCloud::NProto::ETimeHistogramUnits::THU_MS:
183+
histogramCounterOptions |=
184+
EHistogramCounterOption::UseMsUnitsForTimeHistogram;
185+
break;
186+
case NCloud::NProto::ETimeHistogramUnits::THU_MS_AND_US:
187+
histogramCounterOptions |=
188+
EHistogramCounterOption::UseMsUnitsForTimeHistogram;
189+
histogramCounterOptions |=
190+
EHistogramCounterOption::UseUsUnitsForTimeHistogram;
191+
break;
192+
}
176193
return histogramCounterOptions;
177194
}
178195

@@ -300,3 +317,11 @@ void Out<NCloud::NProto::EStatsFetcherType>(
300317
out << NCloud::NProto::EStatsFetcherType_Name(
301318
statsFetcherType);
302319
}
320+
321+
template <>
322+
void Out<NCloud::NProto::ETimeHistogramUnits>(
323+
IOutputStream& out,
324+
NCloud::NProto::ETimeHistogramUnits timeHistogramUnits)
325+
{
326+
out << NCloud::NProto::ETimeHistogramUnits_Name(timeHistogramUnits);
327+
}

cloud/blockstore/libs/diagnostics/config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ class TDiagnosticsConfig
158158

159159
TRequestThresholds GetRequestThresholds() const;
160160
EHistogramCounterOptions GetHistogramCounterOptions() const;
161+
NCloud::NProto::ETimeHistogramUnits GetServerTimeHistogramUnits() const;
161162

162163
NCloud::NProto::EStatsFetcherType GetStatsFetcherType() const;
163164

cloud/filestore/config/diagnostics.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,4 +148,6 @@ message TDiagnosticsConfig
148148
// Limit number of records flushed in each frame during flush iteration
149149
optional uint64 ProfileLogMaxFrameFlushRecords = 30;
150150

151+
// Time histogram units for server metrics
152+
optional NCloud.NProto.ETimeHistogramUnits ServerTimeHistogramUnits = 31;
151153
}

cloud/filestore/libs/diagnostics/config.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ namespace {
4242
xxx(HDDFileSystemPerformanceProfile, TFileSystemPerformanceProfile, {} )\
4343
xxx(SSDFileSystemPerformanceProfile, TFileSystemPerformanceProfile, {} )\
4444
\
45-
xxx(StatsFetcherType, NCloud::NProto::EStatsFetcherType, NCloud::NProto::EStatsFetcherType::CGROUP )\
45+
xxx(ServerTimeHistogramUnits, NCloud::NProto::ETimeHistogramUnits, NCloud::NProto::ETimeHistogramUnits::THU_US )\
46+
xxx(StatsFetcherType, NCloud::NProto::EStatsFetcherType, NCloud::NProto::EStatsFetcherType::CGROUP )\
4647
\
4748
xxx(ProfileLogMaxFlushRecords, ui64, 0 )\
4849
xxx(ProfileLogMaxFrameFlushRecords, ui64, 0 )\
@@ -150,6 +151,22 @@ EHistogramCounterOptions TDiagnosticsConfig::GetHistogramCounterOptions() const
150151
if (GetReportHistogramAsSingleCounter()) {
151152
histogramCounterOptions |= EHistogramCounterOption::ReportSingleCounter;
152153
}
154+
switch (GetServerTimeHistogramUnits()) {
155+
case NCloud::NProto::ETimeHistogramUnits::THU_US:
156+
histogramCounterOptions |=
157+
EHistogramCounterOption::UseUsUnitsForTimeHistogram;
158+
break;
159+
case NCloud::NProto::ETimeHistogramUnits::THU_MS:
160+
histogramCounterOptions |=
161+
EHistogramCounterOption::UseMsUnitsForTimeHistogram;
162+
break;
163+
case NCloud::NProto::ETimeHistogramUnits::THU_MS_AND_US:
164+
histogramCounterOptions |=
165+
EHistogramCounterOption::UseMsUnitsForTimeHistogram;
166+
histogramCounterOptions |=
167+
EHistogramCounterOption::UseUsUnitsForTimeHistogram;
168+
break;
169+
}
153170
return histogramCounterOptions;
154171
}
155172

@@ -242,3 +259,11 @@ void Out<NCloud::NProto::EStatsFetcherType>(
242259
out << NCloud::NProto::EStatsFetcherType_Name(
243260
statsFetcherType);
244261
}
262+
263+
template <>
264+
void Out<NCloud::NProto::ETimeHistogramUnits>(
265+
IOutputStream& out,
266+
NCloud::NProto::ETimeHistogramUnits timeHistogramUnits)
267+
{
268+
out << NCloud::NProto::ETimeHistogramUnits_Name(timeHistogramUnits);
269+
}

cloud/filestore/libs/diagnostics/config.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ class TDiagnosticsConfig
137137
TFileSystemPerformanceProfile GetHDDFileSystemPerformanceProfile() const;
138138
TFileSystemPerformanceProfile GetSSDFileSystemPerformanceProfile() const;
139139

140+
NCloud::NProto::ETimeHistogramUnits GetServerTimeHistogramUnits() const;
141+
140142
NCloud::NProto::EStatsFetcherType GetStatsFetcherType() const;
141143

142144
ui64 GetProfileLogMaxFlushRecords() const;

cloud/storage/core/libs/diagnostics/histogram_counter_options.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@ namespace NCloud {
77
enum class EHistogramCounterOption {
88
ReportSingleCounter = (1 << 0),
99
ReportMultipleCounters = (1 << 1),
10+
// Report time histograms in milliseconds.
11+
UseMsUnitsForTimeHistogram = (1 << 2),
12+
// Report time histograms in microseconds.
13+
UseUsUnitsForTimeHistogram = (1 << 3),
1014
};
1115

1216
Y_DECLARE_FLAGS(EHistogramCounterOptions, EHistogramCounterOption);
1317
Y_DECLARE_OPERATORS_FOR_FLAGS(EHistogramCounterOptions);
1418

1519
} // namespace NCloud
16-

cloud/storage/core/libs/diagnostics/histogram_types.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ struct TRequestMsTimeBuckets
7373
{
7474
std::array<double, BUCKETS_COUNT> result;
7575
for (size_t i = 0; i + 1 < array.size(); ++i) {
76-
result[i] = array[i] / 1000;
76+
result[i] = array[i] / 1000.;
7777
}
7878
result.back() = std::numeric_limits<double>::max();
7979
return result;
@@ -82,7 +82,7 @@ struct TRequestMsTimeBuckets
8282
static constexpr std::array<double, BUCKETS_COUNT> Buckets =
8383
MakeArray(TRequestUsTimeBuckets::Buckets);
8484

85-
static constexpr TStringBuf Units = "msec";
85+
static constexpr TStringBuf Units = "";
8686

8787
static TVector<TString> MakeNames();
8888
};

cloud/storage/core/libs/diagnostics/request_counters.cpp

Lines changed: 106 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,7 @@ struct THistBase
115115

116116
////////////////////////////////////////////////////////////////////////////////
117117

118-
struct TTimeHist
119-
: public THistBase<TRequestUsTimeBuckets>
118+
struct TUsTimeHist: public THistBase<TRequestUsTimeBuckets>
120119
{
121120
using THistBase::THistBase;
122121

@@ -128,6 +127,101 @@ struct TTimeHist
128127

129128
////////////////////////////////////////////////////////////////////////////////
130129

130+
struct TMsTimeHist: public THistBase<TRequestMsTimeBuckets>
131+
{
132+
using THistBase::THistBase;
133+
134+
void Increment(TDuration requestTime, ui64 count = 1)
135+
{
136+
THistBase::Increment(requestTime.MicroSeconds() / 1000., count);
137+
}
138+
};
139+
140+
////////////////////////////////////////////////////////////////////////////////
141+
142+
// Allows to use both microseconds and milliseconds for time histograms.
143+
class TCompoundTimeHist
144+
{
145+
private:
146+
std::unique_ptr<TUsTimeHist> UsTimeHist;
147+
std::unique_ptr<TMsTimeHist> MsTimeHist;
148+
149+
public:
150+
TCompoundTimeHist(
151+
const TString& name,
152+
EHistogramCounterOptions counterOptions)
153+
{
154+
const bool useMsUnitsForTimeHistogram =
155+
counterOptions &
156+
EHistogramCounterOption::UseMsUnitsForTimeHistogram;
157+
const bool useUsUnitsForTimeHistogram =
158+
counterOptions &
159+
EHistogramCounterOption::UseUsUnitsForTimeHistogram;
160+
Y_ABORT_UNLESS(
161+
useMsUnitsForTimeHistogram || useUsUnitsForTimeHistogram,
162+
"At least one of the units must be enabled");
163+
if (useMsUnitsForTimeHistogram) {
164+
MsTimeHist = std::make_unique<TMsTimeHist>(name, counterOptions);
165+
}
166+
if (useUsUnitsForTimeHistogram) {
167+
UsTimeHist = std::make_unique<TUsTimeHist>(name, counterOptions);
168+
}
169+
}
170+
~TCompoundTimeHist() = default;
171+
172+
TCompoundTimeHist(const TCompoundTimeHist&) = delete;
173+
TCompoundTimeHist(TCompoundTimeHist&&) = default;
174+
TCompoundTimeHist& operator=(const TCompoundTimeHist&) = delete;
175+
TCompoundTimeHist& operator=(TCompoundTimeHist&&) = default;
176+
177+
void Increment(TDuration requestTime, ui64 count = 1)
178+
{
179+
if (MsTimeHist) {
180+
MsTimeHist->Increment(requestTime, count);
181+
}
182+
if (UsTimeHist) {
183+
UsTimeHist->Increment(requestTime, count);
184+
}
185+
}
186+
187+
template <typename... Args>
188+
void Register(Args&&... args)
189+
{
190+
if (MsTimeHist) {
191+
MsTimeHist->Register(args...);
192+
}
193+
if (UsTimeHist) {
194+
UsTimeHist->Register(std::forward<Args>(args)...);
195+
}
196+
}
197+
198+
[[nodiscard]] TVector<TBucketInfo> GetBuckets() const
199+
{
200+
if (MsTimeHist) {
201+
return MsTimeHist->GetBuckets();
202+
}
203+
return UsTimeHist->GetBuckets();
204+
}
205+
206+
[[nodiscard]] const TString& GetUnits() const
207+
{
208+
if (MsTimeHist) {
209+
return MsTimeHist->GetUnits();
210+
}
211+
return UsTimeHist->GetUnits();
212+
}
213+
214+
[[nodiscard]] const TString& GetName() const
215+
{
216+
if (MsTimeHist) {
217+
return MsTimeHist->GetName();
218+
}
219+
return UsTimeHist->GetName();
220+
}
221+
};
222+
223+
////////////////////////////////////////////////////////////////////////////////
224+
131225
struct TSizeHist
132226
: public THistBase<TKbSizeBuckets>
133227
{
@@ -289,19 +383,19 @@ struct TRequestCounters::TStatCounters
289383
TSizeHist SizeHist;
290384
TRequestPercentiles<TSizeHist> SizePercentiles;
291385

292-
TTimeHist TimeHist;
293-
TTimeHist TimeHistUnaligned;
294-
TRequestPercentiles<TTimeHist> TimePercentiles;
386+
TCompoundTimeHist TimeHist;
387+
TCompoundTimeHist TimeHistUnaligned;
388+
TRequestPercentiles<TCompoundTimeHist> TimePercentiles;
295389

296-
TTimeHist ExecutionTimeHist;
297-
TTimeHist ExecutionTimeHistUnaligned;
298-
TRequestPercentiles<TTimeHist> ExecutionTimePercentiles;
390+
TCompoundTimeHist ExecutionTimeHist;
391+
TCompoundTimeHist ExecutionTimeHistUnaligned;
392+
TRequestPercentiles<TCompoundTimeHist> ExecutionTimePercentiles;
299393

300-
TTimeHist RequestCompletionTimeHist;
301-
TRequestPercentiles<TTimeHist> RequestCompletionTimePercentiles;
394+
TCompoundTimeHist RequestCompletionTimeHist;
395+
TRequestPercentiles<TCompoundTimeHist> RequestCompletionTimePercentiles;
302396

303-
TTimeHist PostponedTimeHist;
304-
TRequestPercentiles<TTimeHist> PostponedTimePercentiles;
397+
TCompoundTimeHist PostponedTimeHist;
398+
TRequestPercentiles<TCompoundTimeHist> PostponedTimePercentiles;
305399

306400
TMaxCalculator<DEFAULT_BUCKET_COUNT> MaxTimeCalc;
307401
TMaxCalculator<DEFAULT_BUCKET_COUNT> MaxTotalTimeCalc;

cloud/storage/core/libs/diagnostics/request_counters_ut.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ auto IsReadWriteRequest(TRequestCounters::TRequestType t)
102102
auto MakeRequestCounters(
103103
TRequestCounters::EOption options = {},
104104
EHistogramCounterOptions histogramCounterOptions =
105-
EHistogramCounterOption::ReportMultipleCounters)
105+
EHistogramCounterOption::ReportMultipleCounters |
106+
EHistogramCounterOption::UseUsUnitsForTimeHistogram)
106107
{
107108
return TRequestCounters(
108109
CreateWallClockTimer(),
@@ -116,7 +117,8 @@ auto MakeRequestCounters(
116117
auto MakeRequestCountersPtr(
117118
TRequestCounters::EOption options = {},
118119
EHistogramCounterOptions histogramCounterOptions =
119-
EHistogramCounterOption::ReportMultipleCounters)
120+
EHistogramCounterOption::ReportMultipleCounters |
121+
EHistogramCounterOption::UseUsUnitsForTimeHistogram)
120122
{
121123
return std::make_shared<TRequestCounters>(
122124
CreateWallClockTimer(),
@@ -707,8 +709,7 @@ Y_UNIT_TEST_SUITE(TRequestCountersTest)
707709
{
708710
auto monitoring = CreateMonitoringServiceStub();
709711
auto counters = MakeRequestCountersPtr(
710-
TRequestCounters::EOption::ReportDataPlaneHistogram,
711-
EHistogramCounterOption::ReportMultipleCounters);
712+
TRequestCounters::EOption::ReportDataPlaneHistogram);
712713
counters->Register(*monitoring->GetCounters());
713714

714715
AddRequestStats(*counters, WriteRequestType, {
@@ -747,7 +748,8 @@ Y_UNIT_TEST_SUITE(TRequestCountersTest)
747748
auto monitoring = CreateMonitoringServiceStub();
748749
auto counters = MakeRequestCountersPtr(
749750
TRequestCounters::EOption::ReportDataPlaneHistogram,
750-
EHistogramCounterOption::ReportSingleCounter);
751+
EHistogramCounterOption::ReportSingleCounter |
752+
EHistogramCounterOption::UseUsUnitsForTimeHistogram);
751753
counters->Register(*monitoring->GetCounters());
752754

753755
AddRequestStats(*counters, WriteRequestType, {
@@ -789,7 +791,7 @@ Y_UNIT_TEST_SUITE(TRequestCountersTest)
789791
auto monitoring = CreateMonitoringServiceStub();
790792
auto counters = MakeRequestCountersPtr(
791793
TRequestCounters::EOption::ReportDataPlaneHistogram,
792-
{});
794+
EHistogramCounterOption::UseUsUnitsForTimeHistogram);
793795
counters->Register(*monitoring->GetCounters());
794796

795797
AddRequestStats(*counters, WriteRequestType, {

0 commit comments

Comments
 (0)