Skip to content

Commit 90a2811

Browse files
committed
Add more metrics
1 parent b54b47c commit 90a2811

File tree

7 files changed

+158
-48
lines changed

7 files changed

+158
-48
lines changed

src/ServiceControl.Audit/Auditing/AuditIngestion.cs

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
{
33
using System;
44
using System.Collections.Generic;
5-
using System.Diagnostics.Metrics;
65
using System.Threading;
76
using System.Threading.Channels;
87
using System.Threading.Tasks;
98
using Infrastructure.Settings;
9+
using Metrics;
1010
using Microsoft.Extensions.Hosting;
1111
using NServiceBus;
1212
using NServiceBus.Logging;
@@ -26,7 +26,8 @@ public AuditIngestion(
2626
AuditIngestionCustomCheck.State ingestionState,
2727
AuditIngestor auditIngestor,
2828
IAuditIngestionUnitOfWorkFactory unitOfWorkFactory,
29-
IHostApplicationLifetime applicationLifetime)
29+
IHostApplicationLifetime applicationLifetime,
30+
AuditIngestionMetrics metrics)
3031
{
3132
inputEndpoint = settings.AuditQueue;
3233
this.transportCustomization = transportCustomization;
@@ -35,13 +36,16 @@ public AuditIngestion(
3536
this.unitOfWorkFactory = unitOfWorkFactory;
3637
this.settings = settings;
3738
this.applicationLifetime = applicationLifetime;
39+
this.metrics = metrics;
3840

3941
if (!transportSettings.MaxConcurrency.HasValue)
4042
{
4143
throw new ArgumentException("MaxConcurrency is not set in TransportSettings");
4244
}
4345

44-
channel = Channel.CreateBounded<MessageContext>(new BoundedChannelOptions(transportSettings.MaxConcurrency.Value)
46+
MaxBatchSize = transportSettings.MaxConcurrency.Value;
47+
48+
channel = Channel.CreateBounded<MessageContext>(new BoundedChannelOptions(MaxBatchSize)
4549
{
4650
SingleReader = true,
4751
SingleWriter = false,
@@ -190,22 +194,21 @@ async Task EnsureStopped(CancellationToken cancellationToken)
190194

191195
async Task OnMessage(MessageContext messageContext, CancellationToken cancellationToken)
192196
{
193-
var tags = Telemetry.GetIngestedMessageTags(messageContext.Headers, messageContext.Body);
194-
using (new DurationRecorder(ingestionDuration, tags))
197+
using var messageIngestionMetrics = metrics.BeginIngestion(messageContext);
198+
199+
if (settings.MessageFilter != null && settings.MessageFilter(messageContext))
195200
{
196-
if (settings.MessageFilter != null && settings.MessageFilter(messageContext))
197-
{
198-
return;
199-
}
201+
messageIngestionMetrics.Skipped();
202+
return;
203+
}
200204

201-
var taskCompletionSource = new TaskCompletionSource<bool>(TaskCreationOptions.RunContinuationsAsynchronously);
202-
messageContext.SetTaskCompletionSource(taskCompletionSource);
205+
var taskCompletionSource = new TaskCompletionSource<bool>(TaskCreationOptions.RunContinuationsAsynchronously);
206+
messageContext.SetTaskCompletionSource(taskCompletionSource);
203207

204-
await channel.Writer.WriteAsync(messageContext, cancellationToken);
205-
await taskCompletionSource.Task;
208+
await channel.Writer.WriteAsync(messageContext, cancellationToken);
209+
_ = await taskCompletionSource.Task;
206210

207-
successfulMessagesCounter.Add(1, tags);
208-
}
211+
messageIngestionMetrics.Success();
209212
}
210213

211214
public override async Task StartAsync(CancellationToken cancellationToken)
@@ -218,27 +221,27 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken)
218221
{
219222
try
220223
{
221-
var contexts = new List<MessageContext>(transportSettings.MaxConcurrency.Value);
224+
var contexts = new List<MessageContext>(MaxBatchSize);
222225

223226
while (await channel.Reader.WaitToReadAsync(stoppingToken))
224227
{
225228
// will only enter here if there is something to read.
226229
try
227230
{
228231
// as long as there is something to read this will fetch up to MaximumConcurrency items
229-
using (var recorder = new DurationRecorder(batchDuration))
232+
using (var batchMetrics = metrics.BeginBatch(MaxBatchSize))
230233
{
231234
while (channel.Reader.TryRead(out var context))
232235
{
233236
contexts.Add(context);
234237
}
235238

236-
recorder.Tags.Add("ingestion.batch_size", contexts.Count);
237-
238239
await auditIngestor.Ingest(contexts);
240+
241+
batchMetrics.Complete(contexts.Count);
239242
}
240243

241-
consecutiveBatchFailuresCounter.Record(0);
244+
//metrics.ClearB .Record(0);
242245
}
243246
catch (Exception e)
244247
{
@@ -257,7 +260,7 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken)
257260
logger.Info("Ingesting messages failed", e);
258261

259262
// no need to do interlocked increment since this is running sequential
260-
consecutiveBatchFailuresCounter.Record(consecutiveBatchFailures++);
263+
//consecutiveBatchFailuresCounter.Record(consecutiveBatchFailures++);
261264
}
262265
finally
263266
{
@@ -297,9 +300,9 @@ public override async Task StopAsync(CancellationToken cancellationToken)
297300
}
298301

299302
TransportInfrastructure transportInfrastructure;
300-
IMessageReceiver messageReceiver;
301-
long consecutiveBatchFailures = 0;
303+
IMessageReceiver queueIngestor;
302304

305+
readonly int MaxBatchSize;
303306
readonly SemaphoreSlim startStopSemaphore = new(1);
304307
readonly string inputEndpoint;
305308
readonly ITransportCustomization transportCustomization;
@@ -309,12 +312,9 @@ public override async Task StopAsync(CancellationToken cancellationToken)
309312
readonly IAuditIngestionUnitOfWorkFactory unitOfWorkFactory;
310313
readonly Settings settings;
311314
readonly Channel<MessageContext> channel;
312-
readonly Histogram<double> batchDuration = Telemetry.Meter.CreateHistogram<double>(Telemetry.CreateInstrumentName("ingestion", "batch_duration"), unit: "ms", "Average audit message batch processing duration");
313-
readonly Counter<long> successfulMessagesCounter = Telemetry.Meter.CreateCounter<long>(Telemetry.CreateInstrumentName("ingestion", "success"), description: "Successful ingested audit message count");
314-
readonly Histogram<long> consecutiveBatchFailuresCounter = Telemetry.Meter.CreateHistogram<long>(Telemetry.CreateInstrumentName("ingestion", "consecutive_batch_failures"), unit: "count", description: "Consecutive audit ingestion batch failure");
315-
readonly Histogram<double> ingestionDuration = Telemetry.Meter.CreateHistogram<double>(Telemetry.CreateInstrumentName("ingestion", "duration"), unit: "ms", description: "Average incoming audit message processing duration");
316315
readonly Watchdog watchdog;
317316
readonly IHostApplicationLifetime applicationLifetime;
317+
readonly AuditIngestionMetrics metrics;
318318

319319
static readonly ILog logger = LogManager.GetLogger<AuditIngestion>();
320320

src/ServiceControl.Audit/Auditing/AuditIngestionMetrics.cs

Lines changed: 0 additions & 21 deletions
This file was deleted.

src/ServiceControl.Audit/Auditing/AuditIngestor.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Linq;
66
using System.Threading.Tasks;
77
using Infrastructure.Settings;
8+
using Metrics;
89
using Monitoring;
910
using NServiceBus;
1011
using NServiceBus.Logging;
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
namespace ServiceControl.Audit.Auditing.Metrics;
2+
3+
using System.Diagnostics.Metrics;
4+
using NServiceBus.Transport;
5+
6+
public class AuditIngestionMetrics
7+
{
8+
public AuditIngestionMetrics(IMeterFactory meterFactory)
9+
{
10+
var meter = meterFactory.Create(MeterName, MeterVersion);
11+
12+
forwardedMessagesCounter = meter.CreateCounter<long>(CreateInstrumentName("forwarded"), description: "Audit ingestion forwarded message count");
13+
batchDuration = meter.CreateHistogram<double>(CreateInstrumentName("batch_duration"), unit: "ms", "Average audit message batch processing duration");
14+
consecutiveBatchFailureGauge = meter.CreateObservableGauge(CreateInstrumentName("consecutive_batch_failures"), () => consecutiveBatchFailures, unit: "count", description: "Consecutive audit ingestion batch failure");
15+
ingestionDuration = meter.CreateHistogram<double>(CreateInstrumentName("duration"), unit: "ms", description: "Average incoming audit message processing duration");
16+
}
17+
18+
public void IncrementMessagesForwarded(int count) => forwardedMessagesCounter.Add(count);
19+
20+
public MessageIngestionMetrics BeginIngestion(MessageContext messageContext) => new(messageContext, ingestionDuration);
21+
22+
public BatchMetrics BeginBatch(int maxBatchSize) => new(maxBatchSize, batchDuration, RecordBatchOutcome);
23+
24+
void RecordBatchOutcome(bool success)
25+
{
26+
if (success)
27+
{
28+
consecutiveBatchFailures = 0;
29+
}
30+
else
31+
{
32+
consecutiveBatchFailures++;
33+
}
34+
}
35+
36+
static string CreateInstrumentName(string instrumentName) => $"sc.audit.ingestion.{instrumentName}".ToLower();
37+
38+
long consecutiveBatchFailures;
39+
40+
readonly Counter<long> forwardedMessagesCounter;
41+
readonly Histogram<double> batchDuration;
42+
#pragma warning disable IDE0052
43+
readonly ObservableGauge<long> consecutiveBatchFailureGauge;
44+
#pragma warning restore IDE0052
45+
readonly Histogram<double> ingestionDuration;
46+
47+
const string MeterName = "Particular.ServiceControl.Audit";
48+
const string MeterVersion = "0.1.0";
49+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
namespace ServiceControl.Audit.Auditing.Metrics;
2+
3+
using System;
4+
using System.Diagnostics;
5+
using System.Diagnostics.Metrics;
6+
7+
public record BatchMetrics(int MaxBatchSize, Histogram<double> BatchDuration, Action<bool> IsSuccess) : IDisposable
8+
{
9+
public void Dispose()
10+
{
11+
var tags = new TagList();
12+
13+
string result;
14+
15+
if (actualBatchSize <= 0)
16+
{
17+
result = "failed";
18+
IsSuccess(false);
19+
}
20+
else
21+
{
22+
result = actualBatchSize == MaxBatchSize ? "full" : "partial";
23+
24+
IsSuccess(true);
25+
}
26+
27+
tags.Add("result", result);
28+
BatchDuration.Record(sw.ElapsedMilliseconds, tags);
29+
}
30+
31+
public void Complete(int size) => actualBatchSize = size;
32+
33+
int actualBatchSize = -1;
34+
readonly Stopwatch sw = Stopwatch.StartNew();
35+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
namespace ServiceControl.Audit.Auditing.Metrics;
2+
3+
using System;
4+
using System.Diagnostics;
5+
using System.Diagnostics.Metrics;
6+
using EndpointPlugin.Messages.SagaState;
7+
using NServiceBus;
8+
using NServiceBus.Transport;
9+
10+
public record MessageIngestionMetrics(MessageContext Message, Histogram<double> Duration) : IDisposable
11+
{
12+
public void Skipped() => result = "skipped";
13+
14+
public void Success() => result = "success";
15+
16+
public void Dispose()
17+
{
18+
var tags = GetTags(Message);
19+
20+
tags.Add("result", result);
21+
Duration.Record(sw.ElapsedMilliseconds, tags);
22+
}
23+
24+
static TagList GetTags(MessageContext messageContext)
25+
{
26+
var tags = new TagList();
27+
28+
if (messageContext.Headers.TryGetValue(Headers.EnclosedMessageTypes, out var messageType))
29+
{
30+
tags.Add("message.category", messageType == SagaUpdateMessageType ? "saga-update" : "audit-message");
31+
}
32+
else
33+
{
34+
tags.Add("message.category", "control-message");
35+
}
36+
37+
return tags;
38+
}
39+
40+
string result = "failed";
41+
42+
readonly Stopwatch sw = Stopwatch.StartNew();
43+
44+
static readonly string SagaUpdateMessageType = typeof(SagaUpdatedMessage).FullName;
45+
}

src/ServiceControl.Audit/HostApplicationBuilderExtensions.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ namespace ServiceControl.Audit;
66
using System.Threading.Tasks;
77
using Auditing;
88
using Hosting;
9+
using Auditing.Metrics;
910
using Infrastructure;
1011
using Infrastructure.Settings;
1112
using Microsoft.AspNetCore.HttpLogging;

0 commit comments

Comments
 (0)