@@ -137,7 +137,7 @@ public async Task Run(RunConfig runConfig)
137137 return ;
138138
139139 Task ShootingTask ( RateLimiter rateLimitPolicy , string operationType ,
140- Func < T , RunConfig , Gauge ? , Task < ( int , StatusCode ) > > action )
140+ Func < T , RunConfig , Counter ? , Task < ( int , StatusCode ) > > action )
141141 {
142142 var metricFactory = Metrics . WithLabels ( new Dictionary < string , string >
143143 {
@@ -149,41 +149,61 @@ Task ShootingTask(RateLimiter rateLimitPolicy, string operationType,
149149 }
150150 ) ;
151151
152- var okGauge = metricFactory . CreateCounter ( // Counter
152+ var operationsTotal = metricFactory . CreateCounter (
153+ "sdk_operations_total" ,
154+ "Total number of operations performed by the SDK, categorized by type."
155+ ) ;
156+
157+ var operationsSuccessTotal = metricFactory . CreateCounter (
153158 "sdk_operations_success_total" ,
154159 "Total number of successful operations, categorized by type."
155160 ) ;
156- var notOkGauge = metricFactory . CreateCounter (
161+
162+ var operationsFailureTotal = metricFactory . CreateCounter (
157163 "sdk_operations_failure_total" ,
158164 "Total number of failed operations, categorized by type."
159165 ) ;
160- var latencySummary = metricFactory . CreateSummary (
166+
167+ var operationLatencySeconds = metricFactory . CreateHistogram (
161168 "sdk_operation_latency_seconds" ,
162169 "Latency of operations performed by the SDK in seconds, categorized by type and status." ,
163- new [ ] { "status" } ,
164- new SummaryConfiguration // Гистограмма
170+ [ "operation_status" ] ,
171+ new HistogramConfiguration
165172 {
166- MaxAge = TimeSpan . FromSeconds ( 15 ) ,
167- Objectives = new QuantileEpsilonPair [ ]
168- {
169- new ( 0.5 , 0.05 ) ,
170- new ( 0.99 , 0.005 ) ,
171- new ( 0.999 , 0.0005 )
172- }
173- } ) ;
174- var attemptsHistogram = metricFactory . CreateHistogram (
173+ Buckets =
174+ [
175+ 0.001 , // 1 ms
176+ 0.002 , // 2 ms
177+ 0.003 , // 3 ms
178+ 0.004 , // 4 ms
179+ 0.005 , // 5 ms
180+ 0.0075 , // 7.5 ms
181+ 0.010 , // 10 ms
182+ 0.020 , // 20 ms
183+ 0.050 , // 50 ms
184+ 0.100 , // 100 ms
185+ 0.200 , // 200 ms
186+ 0.500 , // 500 ms
187+ 1.000 // 1 s
188+ ]
189+ }
190+ ) ;
191+
192+ var retryAttempts = metricFactory . CreateGauge (
175193 "sdk_retry_attempts" ,
176- "Current retry attempts, categorized by operation type." ,
177- new [ ] { "status" } ,
178- new HistogramConfiguration { Buckets = Histogram . LinearBuckets ( 1 , 1 , 10 ) }
194+ "Current retry attempts, categorized by operation type."
179195 ) ;
180- var errorsGauge = metricFactory . CreateGauge ( "errors" , "amount of errors" , new [ ] { "class" , "in" } ) ;
181196
182- foreach ( var statusCode in Enum . GetValues < StatusCode > ( ) )
183- {
184- errorsGauge . WithLabels ( statusCode . StatusName ( ) , "retried" ) . IncTo ( 0 ) ;
185- errorsGauge . WithLabels ( statusCode . StatusName ( ) , "finally" ) . IncTo ( 0 ) ;
186- }
197+ var pendingOperations = metricFactory . CreateGauge (
198+ "sdk_pending_operations" ,
199+ "Current number of pending operations, categorized by type."
200+ ) ;
201+
202+ var errorsTotal = metricFactory . CreateCounter (
203+ "sdk_errors_total" ,
204+ "Total number of errors encountered, categorized by error type." ,
205+ [ "error_type" ]
206+ ) ;
187207
188208 // ReSharper disable once MethodSupportsCancellation
189209 return Task . Run ( async ( ) =>
@@ -200,25 +220,26 @@ Task ShootingTask(RateLimiter rateLimitPolicy, string operationType,
200220
201221 _ = Task . Run ( async ( ) =>
202222 {
223+ pendingOperations . Inc ( ) ;
203224 var sw = Stopwatch . StartNew ( ) ;
204- var ( attempts , statusCode ) = await action ( client , runConfig , errorsGauge ) ;
225+ var ( attempts , statusCode ) = await action ( client , runConfig , errorsTotal ) ;
205226 sw . Stop ( ) ;
206- string label ;
227+
228+ retryAttempts . Set ( attempts ) ;
229+ operationsTotal . Inc ( ) ;
230+ pendingOperations . Dec ( ) ;
207231
208232 if ( statusCode != StatusCode . Success )
209233 {
210- notOkGauge . Inc ( ) ;
211- label = "err" ;
212- errorsGauge . WithLabels ( statusCode . StatusName ( ) , "finally ") . Inc ( ) ;
234+ errorsTotal . WithLabels ( statusCode . StatusName ( ) ) . Inc ( ) ;
235+ operationsFailureTotal . Inc ( ) ;
236+ operationLatencySeconds . WithLabels ( "err ") . Observe ( sw . ElapsedMilliseconds / 1000 ) ;
213237 }
214238 else
215239 {
216- okGauge . Inc ( ) ;
217- label = "ok" ;
240+ operationsSuccessTotal . Inc ( ) ;
241+ operationLatencySeconds . WithLabels ( "success" ) . Observe ( sw . ElapsedMilliseconds / 1000 ) ;
218242 }
219-
220- attemptsHistogram . WithLabels ( label ) . Observe ( attempts ) ;
221- latencySummary . WithLabels ( label ) . Observe ( sw . ElapsedMilliseconds ) ;
222243 } , cancellationTokenSource . Token ) ;
223244 }
224245
@@ -237,12 +258,12 @@ Task ShootingTask(RateLimiter rateLimitPolicy, string operationType,
237258 // return attempt count & StatusCode operation
238259 protected abstract Task < ( int , StatusCode ) > Upsert ( T client , string upsertSql ,
239260 Dictionary < string , YdbValue > parameters ,
240- int writeTimeout , Gauge ? errorsGauge = null ) ;
261+ int writeTimeout , Counter ? errorsTotal = null ) ;
241262
242263 protected abstract Task < ( int , StatusCode , object ? ) > Select ( T client , string selectSql ,
243- Dictionary < string , YdbValue > parameters , int readTimeout , Gauge ? errorsGauge = null ) ;
264+ Dictionary < string , YdbValue > parameters , int readTimeout , Counter ? errorsTotal = null ) ;
244265
245- private Task < ( int , StatusCode ) > Upsert ( T client , Config config , Gauge ? errorsGauge = null )
266+ private Task < ( int , StatusCode ) > Upsert ( T client , Config config , Counter ? errorsTotal = null )
246267 {
247268 const int minSizeStr = 20 ;
248269 const int maxSizeStr = 40 ;
@@ -265,12 +286,12 @@ Task ShootingTask(RateLimiter rateLimitPolicy, string operationType,
265286 } ,
266287 { "$payload_double" , YdbValue . MakeDouble ( Random . Shared . NextDouble ( ) ) } ,
267288 { "$payload_timestamp" , YdbValue . MakeTimestamp ( DateTime . Now ) }
268- } , config . WriteTimeout , errorsGauge ) ;
289+ } , config . WriteTimeout , errorsTotal ) ;
269290 }
270291
271292 protected abstract Task < T > CreateClient ( Config config ) ;
272293
273- private async Task < ( int , StatusCode ) > Select ( T client , RunConfig config , Gauge ? errorsGauge = null )
294+ private async Task < ( int , StatusCode ) > Select ( T client , RunConfig config , Counter ? errorsTotal = null )
274295 {
275296 var ( attempts , code , _) = await Select ( client ,
276297 $ """
@@ -281,7 +302,7 @@ Task ShootingTask(RateLimiter rateLimitPolicy, string operationType,
281302 new Dictionary < string , YdbValue >
282303 {
283304 { "$id" , YdbValue . MakeInt32 ( Random . Shared . Next ( _maxId ) ) }
284- } , config . ReadTimeout , errorsGauge ) ;
305+ } , config . ReadTimeout , errorsTotal ) ;
285306
286307 return ( attempts , code ) ;
287308 }
@@ -294,4 +315,4 @@ public static string StatusName(this StatusCode statusCode)
294315 var prefix = statusCode >= StatusCode . ClientTransportResourceExhausted ? "GRPC" : "YDB" ;
295316 return $ "{ prefix } _{ statusCode } ";
296317 }
297- }
318+ }
0 commit comments