Skip to content
This repository was archived by the owner on Oct 11, 2023. It is now read-only.

Commit b3de8ff

Browse files
peterfeltssaixiaohui
authored andcommitted
Adding diagnostics instrumentation around CosmosDB failures and process termination. (#341)
1 parent 7a9dc7f commit b3de8ff

File tree

4 files changed

+64
-24
lines changed

4 files changed

+64
-24
lines changed

Services.Test/Storage/CosmosDbSql/EngineTest.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ public class EngineTest
2626

2727
private readonly Mock<IFactory> factory;
2828
private readonly Mock<ILogger> logger;
29+
private readonly Mock<IDiagnosticsLogger> mockDiagnosticsLogger;
2930
private readonly Mock<IInstance> instance;
3031
private readonly Mock<ISDKWrapper> cosmosDbSql;
3132
private readonly Mock<IDocumentClient> cosmosDbSqlClient;
@@ -39,6 +40,7 @@ public EngineTest()
3940
{
4041
this.factory = new Mock<IFactory>();
4142
this.logger = new Mock<ILogger>();
43+
this.mockDiagnosticsLogger = new Mock<IDiagnosticsLogger>();
4244
this.instance = new Mock<IInstance>();
4345

4446
this.storageConfig = new Config { CosmosDbSqlDatabase = "db", CosmosDbSqlCollection = "coll" };
@@ -49,7 +51,7 @@ public EngineTest()
4951
this.cosmosDbSqlClient = new Mock<IDocumentClient>();
5052
this.cosmosDbSql.Setup(x => x.GetClientAsync(this.storageConfig)).ReturnsAsync(this.cosmosDbSqlClient.Object);
5153

52-
this.target = new Engine(this.factory.Object, this.logger.Object, this.instance.Object);
54+
this.target = new Engine(this.factory.Object, this.logger.Object, this.mockDiagnosticsLogger.Object, this.instance.Object);
5355

5456
this.target.Init(this.storageConfig);
5557
this.instance.Invocations.Clear();
@@ -59,7 +61,7 @@ public EngineTest()
5961
public void ItCanBeInitializedOnlyOnce()
6062
{
6163
// Act
62-
var engine = new Engine(this.factory.Object, this.logger.Object, this.instance.Object);
64+
var engine = new Engine(this.factory.Object, this.logger.Object, this.mockDiagnosticsLogger.Object, this.instance.Object);
6365
engine.Init(this.storageConfig);
6466

6567
// Assert

Services.Test/Storage/EnginesTest.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ public EnginesTest()
2828
// implement the same interface and don't have a parameterless ctor
2929
this.instance = new Mock<IInstance>();
3030
var logger = new Mock<ILogger>();
31+
var mockDiagnosticsLogger = new Mock<IDiagnosticsLogger>();
3132
this.factory.Setup(x => x.Resolve<CosmosDbSqlEngine>())
32-
.Returns(new CosmosDbSqlEngine(this.factory.Object, logger.Object, this.instance.Object));
33+
.Returns(new CosmosDbSqlEngine(this.factory.Object, logger.Object, mockDiagnosticsLogger.Object, this.instance.Object));
3334
this.factory.Setup(x => x.Resolve<TableStorageEngine>())
3435
.Returns(new TableStorageEngine(this.factory.Object, logger.Object, this.instance.Object));
3536
}

Services/Storage/CosmosDbSql/Engine.cs

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
using System;
44
using System.Collections.Generic;
5-
using System.Linq;
65
using System.Net;
76
using System.Threading.Tasks;
87
using Microsoft.Azure.Documents;
@@ -19,6 +18,7 @@ public class Engine : IEngine, IDisposable
1918
private readonly ILogger log;
2019
private readonly IInstance instance;
2120
private readonly IFactory factory;
21+
private readonly IDiagnosticsLogger diagnosticsLogger;
2222

2323
private Config storageConfig;
2424

@@ -34,10 +34,12 @@ public class Engine : IEngine, IDisposable
3434
public Engine(
3535
IFactory factory,
3636
ILogger logger,
37+
IDiagnosticsLogger diagnosticsLogger,
3738
IInstance instance)
3839
{
3940
this.log = logger;
4041
this.instance = instance;
42+
this.diagnosticsLogger = diagnosticsLogger;
4143
this.factory = factory;
4244

4345
this.disposedValue = false;
@@ -118,7 +120,11 @@ public async Task<IEnumerable<IDataRecord>> GetAllAsync()
118120
}
119121
catch (Exception e)
120122
{
121-
this.log.Error("Unexpected error while reading from Cosmos DB SQL", () => new { this.storageName, e });
123+
const string MSG = "Unexpected error while reading from Cosmos DB SQL";
124+
var errorData = new { this.storageName, e };
125+
126+
this.log.Error(MSG, () => errorData);
127+
this.diagnosticsLogger.LogServiceError(MSG, errorData);
122128
throw new ExternalDependencyException(e);
123129
}
124130
}
@@ -146,8 +152,11 @@ public async Task<IDataRecord> CreateAsync(IDataRecord input)
146152
}
147153
catch (Exception e)
148154
{
149-
this.log.Error("Unexpected error while writing to Cosmos DB SQL",
150-
() => new { this.storageName, Id = input.GetId(), e });
155+
const string MSG = "Unexpected error while writing to Cosmos DB SQL";
156+
var errorData = new { this.storageName, Id = input.GetId(), e };
157+
158+
this.log.Error(MSG, () => errorData);
159+
this.diagnosticsLogger.LogServiceError(MSG, errorData);
151160
throw new ExternalDependencyException(e);
152161
}
153162
}
@@ -182,8 +191,11 @@ public async Task<IDataRecord> UpsertAsync(IDataRecord input, string eTag)
182191
}
183192
catch (Exception e)
184193
{
185-
this.log.Error("Unexpected error while writing to Cosmos DB SQL",
186-
() => new { this.storageName, Id = input.GetId(), eTag, e });
194+
const string MSG = "Unexpected error while writing to Cosmos DB SQL";
195+
var errorData = new { this.storageName, Id = input.GetId(), eTag, e };
196+
197+
this.log.Error(MSG, () => errorData);
198+
this.diagnosticsLogger.LogServiceError(MSG, errorData);
187199
throw new ExternalDependencyException(e);
188200
}
189201
}
@@ -207,8 +219,11 @@ public async Task DeleteAsync(string id)
207219
}
208220
catch (Exception e)
209221
{
210-
this.log.Error("Unexpected error while writing to Cosmos DB SQL",
211-
() => new { this.storageName, id, e });
222+
const string MSG = "Unexpected error while writing to Cosmos DB SQL";
223+
var errorData = new { this.storageName, id, e };
224+
225+
this.log.Error(MSG, () => errorData);
226+
this.diagnosticsLogger.LogServiceError(MSG, errorData);
212227
throw new ExternalDependencyException(e);
213228
}
214229
}
@@ -274,8 +289,11 @@ public async Task<bool> TryToLockAsync(
274289
}
275290
catch (Exception e)
276291
{
277-
this.log.Error("Unexpected error while writing to Cosmos DB SQL",
278-
() => new { this.storageName, id, ownerId, ownerType, lockDurationSecs = durationSeconds, e });
292+
const string MSG = "Unexpected error while writing to Cosmos DB SQL";
293+
var errorData = new { this.storageName, id, ownerId, ownerType, lockDurationSecs = durationSeconds, e };
294+
295+
this.log.Error(MSG, () => errorData);
296+
this.diagnosticsLogger.LogServiceError(MSG, errorData);
279297
}
280298

281299
return false;
@@ -321,8 +339,11 @@ public async Task<bool> TryToUnlockAsync(string id, string ownerId, string owner
321339
}
322340
catch (Exception e)
323341
{
324-
this.log.Error("Unexpected error while writing to Cosmos DB SQL",
325-
() => new { this.storageName, id, ownerId, ownerType, e });
342+
const string MSG = "Unexpected error while writing to Cosmos DB SQL";
343+
var errorData = new { this.storageName, id, ownerId, ownerType, e };
344+
345+
this.log.Error(MSG, () => errorData);
346+
this.diagnosticsLogger.LogServiceError(MSG, errorData);
326347
}
327348

328349
return false;
@@ -397,8 +418,11 @@ private async Task SetupStorageAsync()
397418
}
398419
catch (Exception e)
399420
{
400-
this.log.Error("Unexpected error while reading from Cosmos DB SQL",
401-
() => new { this.storageName, id, e });
421+
const string MSG = "Unexpected error while reading from Cosmos DB SQL";
422+
var errorData = new { this.storageName, id, e};
423+
424+
this.log.Error(MSG, () => errorData);
425+
this.diagnosticsLogger.LogServiceError(MSG, errorData);
402426
throw new ExternalDependencyException(e);
403427
}
404428
}
@@ -436,7 +460,11 @@ private async Task TryToDeleteExpiredRecord(string id)
436460
catch (Exception e)
437461
{
438462
// Log and do not throw, we're just trying to delete and will retry automatically later
439-
this.log.Warn("Unexpected error while writing to Cosmos DB SQL", () => new { this.storageName, id, e });
463+
const string MSG = "Unexpected error while writing to Cosmos DB SQL";
464+
var errorData = new { this.storageName, id, e };
465+
466+
this.log.Error(MSG, () => errorData);
467+
this.diagnosticsLogger.LogServiceError(MSG, errorData);
440468
}
441469
}
442470
}

WebService/Startup.cs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
using Microsoft.AspNetCore.Hosting;
1111
using Microsoft.Azure.IoTSolutions.DeviceSimulation.PartitioningAgent;
1212
using Microsoft.Azure.IoTSolutions.DeviceSimulation.Services;
13+
using Microsoft.Azure.IoTSolutions.DeviceSimulation.Services.Diagnostics;
1314
using Microsoft.Azure.IoTSolutions.DeviceSimulation.SimulationAgent;
1415
using Microsoft.Azure.IoTSolutions.DeviceSimulation.WebService.Auth;
1516
using Microsoft.Azure.IoTSolutions.DeviceSimulation.WebService.Runtime;
@@ -158,6 +159,8 @@ private void StopAgents()
158159

159160
private Task MonitorThreadsAsync(IApplicationLifetime appLifetime)
160161
{
162+
const string MSG = "Part of the service is not running";
163+
161164
return Task.Run(() =>
162165
{
163166
while (!this.appStopToken.IsCancellationRequested)
@@ -173,12 +176,18 @@ private Task MonitorThreadsAsync(IApplicationLifetime appLifetime)
173176
|| this.partitioningAgentTask.Status == TaskStatus.RanToCompletion)
174177
{
175178
var log = this.ApplicationContainer.Resolve<ILogger>();
176-
log.Error("Part of the service is not running",
177-
() => new
178-
{
179-
SimulationAgent = this.simulationAgentTask.Status.ToString(),
180-
PartitioningAgent = this.partitioningAgentTask.Status.ToString()
181-
});
179+
var diagnosticsLogger = this.ApplicationContainer.Resolve<IDiagnosticsLogger>();
180+
181+
var errorData = new
182+
{
183+
SimulationAgent = this.simulationAgentTask.Status.ToString(),
184+
PartitioningAgent = this.partitioningAgentTask.Status.ToString()
185+
};
186+
187+
log.Error(MSG, () => errorData);
188+
189+
// Send diagnostics information
190+
diagnosticsLogger.LogServiceError(MSG, errorData);
182191

183192
// Allow few seconds to flush logs
184193
Thread.Sleep(5000);

0 commit comments

Comments
 (0)