Skip to content

Commit 0e7962b

Browse files
committed
Move a few instruments to be attributes instead
1 parent 6932fb3 commit 0e7962b

File tree

7 files changed

+39
-32
lines changed

7 files changed

+39
-32
lines changed

docs/telemetry.md

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,28 @@ The following metrics are available:
1818

1919
### Ingestion
2020

21-
- `sc.audit.ingestion.success` - Successful ingested audit message count
22-
- `sc.audit.ingestion.retry` - Retried audit message count
23-
- `sc.audit.ingestion.failed` - Failed audit message count
24-
- `sc.audit.ingestion.duration` - Audit message processing duration (in milliseconds)
25-
- `sc.audit.ingestion.message_size` - Audit message body size (in kilobytes)
26-
- `sc.audit.ingestion.forwarded` - Forwarded audit messages count
21+
#### Success or failure
22+
23+
- `sc.audit.ingestion.success` - Successful ingested audit message count (Counter)
24+
- `sc.audit.ingestion.retry` - Retried audit message count (Counter)
25+
- `sc.audit.ingestion.failed` - Failed audit message count (Counter)
26+
27+
The above metrics also have the following attributes attached:
28+
29+
- `messaging.message.body.size` - The size of the message body in bytes
30+
- `messaging.message.type` - The logical message type of the message if present
31+
32+
#### Details
33+
34+
- `sc.audit.ingestion.duration` - Audit message processing duration in milliseconds (Histogram)
35+
- `sc.audit.ingestion.forwarded` - Count of the number of forwarded audit messages if forwarding is enabled (Counter)
2736

2837
### Batching
2938

30-
- `sc.audit.ingestion.batch_duration` - Batch processing duration (in milliseconds)
31-
- `sc.audit.ingestion.batch_size` - Batch size (number of messages)
32-
- `sc.audit.ingestion.consecutive_batch_failures` - Consecutive batch failures
39+
- `sc.audit.ingestion.batch_duration` - Batch processing duration in milliseconds (Histogram)
40+
- Attributes:
41+
- `ingestion.batch_size`
42+
- `sc.audit.ingestion.consecutive_batch_failures` - Consecutive batch failures (Counter)
3343

3444
## Monitoring
3545

src/ServiceControl.Audit/Auditing/AuditIngestion.cs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ async Task EnsureStopped(CancellationToken cancellationToken = default)
174174

175175
async Task OnMessage(MessageContext messageContext, CancellationToken cancellationToken)
176176
{
177-
using (new DurationRecorder(ingestionDuration))
177+
var tags = Telemetry.GetIngestedMessageTags(messageContext.Headers, messageContext.Body);
178+
using (new DurationRecorder(ingestionDuration, tags))
178179
{
179180
if (settings.MessageFilter != null && settings.MessageFilter(messageContext))
180181
{
@@ -187,8 +188,7 @@ async Task OnMessage(MessageContext messageContext, CancellationToken cancellati
187188
await channel.Writer.WriteAsync(messageContext, cancellationToken);
188189
await taskCompletionSource.Task;
189190

190-
successfulMessagesCounter.Add(1, Telemetry.GetIngestedMessageTags(messageContext.Headers));
191-
messageSize.Record(messageContext.Body.Length / 1024.0);
191+
successfulMessagesCounter.Add(1, tags);
192192
}
193193
}
194194

@@ -210,14 +210,14 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken)
210210
try
211211
{
212212
// as long as there is something to read this will fetch up to MaximumConcurrency items
213-
using (new DurationRecorder(auditBatchDuration))
213+
using (var recorder = new DurationRecorder(batchDuration))
214214
{
215215
while (channel.Reader.TryRead(out var context))
216216
{
217217
contexts.Add(context);
218218
}
219219

220-
auditBatchSize.Record(contexts.Count);
220+
recorder.Tags.Add("ingestion.batch_size", contexts.Count);
221221

222222
await auditIngestor.Ingest(contexts);
223223
}
@@ -293,9 +293,7 @@ public override async Task StopAsync(CancellationToken cancellationToken)
293293
readonly IAuditIngestionUnitOfWorkFactory unitOfWorkFactory;
294294
readonly Settings settings;
295295
readonly Channel<MessageContext> channel;
296-
readonly Histogram<long> auditBatchSize = Telemetry.Meter.CreateHistogram<long>(Telemetry.CreateInstrumentName("ingestion", "batch_size"), description: "Audit ingestion average batch size");
297-
readonly Histogram<double> auditBatchDuration = Telemetry.Meter.CreateHistogram<double>(Telemetry.CreateInstrumentName("ingestion", "batch_duration"), unit: "ms", "Average audit message batch processing duration");
298-
readonly Histogram<double> messageSize = Telemetry.Meter.CreateHistogram<double>(Telemetry.CreateInstrumentName("ingestion", "message_size"), unit: "kilobytes", description: "Average audit message body size");
296+
readonly Histogram<double> batchDuration = Telemetry.Meter.CreateHistogram<double>(Telemetry.CreateInstrumentName("ingestion", "batch_duration"), unit: "ms", "Average audit message batch processing duration");
299297
readonly Counter<long> successfulMessagesCounter = Telemetry.Meter.CreateCounter<long>(Telemetry.CreateInstrumentName("ingestion", "success"), description: "Successful ingested audit message count");
300298
readonly Histogram<long> consecutiveBatchFailuresCounter = Telemetry.Meter.CreateHistogram<long>(Telemetry.CreateInstrumentName("ingestion", "consecutive_batch_failures"), unit: "count", description: "Consecutive audit ingestion batch failure");
301299
readonly Histogram<double> ingestionDuration = Telemetry.Meter.CreateHistogram<double>(Telemetry.CreateInstrumentName("ingestion", "duration"), unit: "ms", description: "Average incoming audit message processing duration");

src/ServiceControl.Audit/Auditing/AuditIngestionFaultPolicy.cs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,10 @@
99
using System.Threading;
1010
using System.Threading.Tasks;
1111
using Infrastructure;
12-
using NServiceBus;
1312
using NServiceBus.Logging;
1413
using NServiceBus.Transport;
15-
using ServiceControl.Audit.Persistence;
16-
using ServiceControl.Configuration;
14+
using Persistence;
15+
using Configuration;
1716
using ServiceControl.Infrastructure;
1817

1918
class AuditIngestionFaultPolicy
@@ -36,7 +35,7 @@ public AuditIngestionFaultPolicy(IFailedAuditStorage failedAuditStorage, Logging
3635

3736
public async Task<ErrorHandleResult> OnError(ErrorContext errorContext, CancellationToken cancellationToken = default)
3837
{
39-
var tags = Telemetry.GetIngestedMessageTags(errorContext.Message.Headers);
38+
var tags = Telemetry.GetIngestedMessageTags(errorContext.Message.Headers, errorContext.Message.Body);
4039

4140
//Same as recoverability policy in NServiceBusFactory
4241
if (errorContext.ImmediateProcessingFailures < 3)

src/ServiceControl.Audit/Auditing/AuditIngestor.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
using Persistence.UnitOfWork;
1515
using Recoverability;
1616
using SagaAudit;
17+
using ServiceControl.Infrastructure;
1718
using ServiceControl.Transports;
1819

1920
public class AuditIngestor

src/ServiceControl.Audit/HostApplicationBuilderExtensions.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ namespace ServiceControl.Audit;
1919
using NServiceBus.Transport;
2020
using Persistence;
2121
using Transports;
22+
using ServiceControl.Infrastructure;
2223
using OpenTelemetry.Metrics;
2324
using OpenTelemetry.Resources;
2425

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
namespace ServiceControl.Audit;
1+
namespace ServiceControl.Infrastructure;
22

33
using System;
44
using System.Diagnostics;
55
using System.Diagnostics.Metrics;
66

7-
record DurationRecorder(Histogram<double> Histogram) : IDisposable
7+
record DurationRecorder(Histogram<double> Histogram, TagList Tags = default) : IDisposable
88
{
99
readonly Stopwatch sw = Stopwatch.StartNew();
1010

11-
public void Dispose() => Histogram.Record(sw.ElapsedMilliseconds);
11+
public void Dispose() => Histogram.Record(sw.ElapsedMilliseconds, Tags);
1212
}

src/ServiceControl.Audit/Infrastructure/Telemetry.cs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
namespace ServiceControl.Audit;
1+
namespace ServiceControl.Infrastructure;
22

3+
using System;
34
using System.Collections.Generic;
45
using System.Diagnostics;
56
using System.Diagnostics.Metrics;
@@ -13,18 +14,15 @@ static class Telemetry
1314

1415
public static string CreateInstrumentName(string instrumentNamespace, string instrumentName) => $"sc.audit.{instrumentNamespace}.{instrumentName}".ToLower();
1516

16-
public static void AddAuditIngestionMeters(this MeterProviderBuilder builder)
17-
{
18-
builder.AddMeter(MeterName);
19-
}
17+
public static void AddAuditIngestionMeters(this MeterProviderBuilder builder) => builder.AddMeter(MeterName);
2018

21-
public static TagList GetIngestedMessageTags(IDictionary<string, string> headers)
19+
public static TagList GetIngestedMessageTags(IDictionary<string, string> headers, ReadOnlyMemory<byte> body)
2220
{
23-
var tags = new TagList();
21+
var tags = new TagList { { "messaging.message.body.size", body.Length } };
2422

2523
if (headers.TryGetValue(Headers.EnclosedMessageTypes, out var messageType))
2624
{
27-
tags.Add("nservicebus.message_type", messageType);
25+
tags.Add("messaging.message.type", messageType);
2826
}
2927

3028
return tags;

0 commit comments

Comments
 (0)