Skip to content

Commit 22a99b5

Browse files
authored
Merge pull request #4445 from Particular/john/trackbutleaveone
Ensuring we leave at least one unhealthy instance is not automatically deleted for untracked endpoints
2 parents 1cd24b5 + 959ea4b commit 22a99b5

File tree

5 files changed

+22
-13
lines changed

5 files changed

+22
-13
lines changed

src/ServiceControl.Persistence/EndpointsView.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
namespace ServiceControl.Persistence
22
{
33
using System;
4-
using System.Text.Json.Serialization;
54

65
public class EndpointsView
76
{
@@ -12,6 +11,5 @@ public class EndpointsView
1211
public bool MonitorHeartbeat { get; set; }
1312
public HeartbeatInformation HeartbeatInformation { get; set; }
1413
public bool IsSendingHeartbeats { get; set; }
15-
[JsonIgnore] public bool IsNotSendingHeartbeats { get; set; }
1614
}
1715
}

src/ServiceControl.UnitTests/Monitoring/HeartbeatEndpointSettingsSyncHostedServiceTests.cs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,16 +134,18 @@ public async Task
134134
const string endpointName1 = "Sales";
135135
Guid instanceA = DeterministicGuid.MakeId(endpointName1, "A");
136136
Guid instanceB = DeterministicGuid.MakeId(endpointName1, "B");
137+
Guid instanceC = DeterministicGuid.MakeId(endpointName1, "C");
137138
var mockMonitoringDataStore = new MockMonitoringDataStore(
138139
[new KnownEndpoint { EndpointDetails = new EndpointDetails { Name = endpointName1 } }]);
139140
var mockEndpointInstanceMonitoring = new MockEndpointInstanceMonitoring([
140-
new EndpointsView { IsNotSendingHeartbeats = true, Name = endpointName1, Id = instanceA },
141-
new EndpointsView { IsNotSendingHeartbeats = true, Name = endpointName1, Id = instanceB },
141+
new EndpointsView { IsSendingHeartbeats = false, Name = endpointName1, Id = instanceA },
142+
new EndpointsView { IsSendingHeartbeats = false, Name = endpointName1, Id = instanceB },
143+
new EndpointsView { IsSendingHeartbeats = false, Name = endpointName1, Id = instanceC },
142144
new EndpointsView
143145
{
144-
IsNotSendingHeartbeats = false,
146+
IsSendingHeartbeats = true,
145147
Name = endpointName1,
146-
Id = DeterministicGuid.MakeId(endpointName1, "C")
148+
Id = DeterministicGuid.MakeId(endpointName1, "D")
147149
}
148150
]);
149151
var service = new HeartbeatEndpointSettingsSyncHostedService(
@@ -177,10 +179,10 @@ public async Task
177179
var mockMonitoringDataStore = new MockMonitoringDataStore(
178180
[new KnownEndpoint { EndpointDetails = new EndpointDetails { Name = endpointName1 } }]);
179181
var mockEndpointInstanceMonitoring = new MockEndpointInstanceMonitoring([
180-
new EndpointsView { IsNotSendingHeartbeats = true, Name = endpointName1, Id = instanceA },
182+
new EndpointsView { IsSendingHeartbeats = false, Name = endpointName1, Id = instanceA },
181183
new EndpointsView
182184
{
183-
IsNotSendingHeartbeats = false,
185+
IsSendingHeartbeats = true,
184186
Name = endpointName1,
185187
Id = DeterministicGuid.MakeId(endpointName1, "B")
186188
}

src/ServiceControl/Monitoring/EndpointInstanceMonitoring.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ public EndpointsView[] GetEndpoints()
120120
{
121121
var view = endpoint.GetView();
122122
view.IsSendingHeartbeats = heartbeatLookup[endpoint.Id].Any(x => x.IsSendingHeartbeats());
123-
view.IsNotSendingHeartbeats = heartbeatLookup[endpoint.Id].Any(x => x.IsNotSendingHeartbeats());
124123
list.Add(view);
125124
}
126125

src/ServiceControl/Monitoring/HeartbeatEndpointSettingsSyncHostedService.cs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ protected override async Task ExecuteAsync(CancellationToken cancellationToken)
3535
{
3636
try
3737
{
38+
logger.LogInformation($"Performing sync for {nameof(HeartbeatEndpointSettingsSyncHostedService)}");
3839
await PerformSync(cancellationToken);
3940
}
4041
catch (Exception ex) when (ex is not OperationCanceledException)
@@ -62,8 +63,9 @@ async Task PerformSync(CancellationToken cancellationToken)
6263

6364
async Task PurgeMonitoringDataThatDoesNotNeedToBeTracked(CancellationToken cancellationToken)
6465
{
65-
ILookup<string, Guid> monitorEndpointsLookup = endpointInstanceMonitoring.GetEndpoints()
66-
.Where(view => view.IsNotSendingHeartbeats)
66+
EndpointsView[] endpointsViews = endpointInstanceMonitoring.GetEndpoints();
67+
ILookup<string, Guid> monitorEndpointsLookup = endpointsViews
68+
.Where(view => !view.IsSendingHeartbeats)
6769
.ToLookup(view => view.Name, view => view.Id);
6870
await foreach (EndpointSettings endpointSetting in endpointSettingsStore.GetAllEndpointSettings()
6971
.WithCancellation(cancellationToken))
@@ -72,10 +74,13 @@ async Task PurgeMonitoringDataThatDoesNotNeedToBeTracked(CancellationToken cance
7274
{
7375
if (monitorEndpointsLookup.Contains(endpointSetting.Name))
7476
{
75-
foreach (Guid endpointId in monitorEndpointsLookup[endpointSetting.Name])
77+
// We leave one dead instance behind, so that in ServicePulse we still display the endpoint as unhealthy, and is up to the user to manually delete it.
78+
// Otherwise, we would delete all dead instances and it could be that the endpoint should be alive but all instances are down and then we display nothing in ServicePulse which is no good!
79+
foreach (Guid endpointId in monitorEndpointsLookup[endpointSetting.Name].SkipLast(1))
7680
{
7781
endpointInstanceMonitoring.RemoveEndpoint(endpointId);
7882
await monitoringDataStore.Delete(endpointId);
83+
logger.LogInformation($"Removed endpoint '{endpointSetting.Name}' from monitoring data.");
7984
}
8085
}
8186
}
@@ -102,6 +107,8 @@ async Task InitialiseSettings(HashSet<string> monitorEndpoints, CancellationToke
102107
if (!monitorEndpoints.Contains(endpointSetting.Name))
103108
{
104109
await endpointSettingsStore.Delete(endpointSetting.Name, cancellationToken);
110+
logger.LogInformation(
111+
$"Removed EndpointTracking setting for '{endpointSetting.Name}' endpoint, since this endpoint is no longer monitored.");
105112
}
106113

107114
settingsNames.Add(endpointSetting.Name);
@@ -113,6 +120,8 @@ async Task InitialiseSettings(HashSet<string> monitorEndpoints, CancellationToke
113120
await endpointSettingsStore.UpdateEndpointSettings(
114121
new EndpointSettings { Name = string.Empty, TrackInstances = userSetTrackInstances },
115122
cancellationToken);
123+
logger.LogInformation(
124+
$"Initialized default value of EndpointTracking to {(userSetTrackInstances ? "tracking" : "not tracking")}.");
116125
}
117126

118127
// Initialise settings for any missing endpoint
@@ -121,6 +130,8 @@ await endpointSettingsStore.UpdateEndpointSettings(
121130
await endpointSettingsStore.UpdateEndpointSettings(
122131
new EndpointSettings { Name = name, TrackInstances = userSetTrackInstances },
123132
cancellationToken);
133+
logger.LogInformation(
134+
$"Initialized '{name}' value of EndpointTracking to {(userSetTrackInstances ? "tracking" : "not tracking")}.");
124135
}
125136
}
126137
}

src/ServiceControl/Monitoring/HeartbeatMonitor.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ public RecordedHeartbeat MarkDeadIfOlderThan(DateTime cutoff)
4747
}
4848

4949
public bool IsSendingHeartbeats() => heartbeat?.Status == HeartbeatStatus.Alive;
50-
public bool IsNotSendingHeartbeats() => heartbeat?.Status == HeartbeatStatus.Dead;
5150
volatile RecordedHeartbeat heartbeat = new RecordedHeartbeat(HeartbeatStatus.Unknown, null);
5251
}
5352
}

0 commit comments

Comments
 (0)