Skip to content

Commit cfd8467

Browse files
authored
fix: resolve 10 critical monitoring service issues
2 parents 53eb08c + 4b5cb90 commit cfd8467

File tree

12 files changed

+1281
-191
lines changed

12 files changed

+1281
-191
lines changed

ThingConnect.Pulse.Server/Controllers/AuthController.cs

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
using Microsoft.AspNetCore.Mvc;
44
using ThingConnect.Pulse.Server.Data;
55
using ThingConnect.Pulse.Server.Models;
6+
using ThingConnect.Pulse.Server.Services;
67

78
namespace ThingConnect.Pulse.Server.Controllers;
89

@@ -13,15 +14,18 @@ public sealed class AuthController : ControllerBase
1314
private readonly UserManager<ApplicationUser> _userManager;
1415
private readonly SignInManager<ApplicationUser> _signInManager;
1516
private readonly ILogger<AuthController> _logger;
17+
private readonly ISettingsService _settingsService;
1618

1719
public AuthController(
1820
UserManager<ApplicationUser> userManager,
1921
SignInManager<ApplicationUser> signInManager,
20-
ILogger<AuthController> logger)
22+
ILogger<AuthController> logger,
23+
ISettingsService settingsService)
2124
{
2225
_userManager = userManager;
2326
_signInManager = signInManager;
2427
_logger = logger;
28+
_settingsService = settingsService;
2529
}
2630

2731
/// <summary>
@@ -118,7 +122,14 @@ public async Task<ActionResult<UserInfoDto>> RegisterAsync([FromBody] RegisterRe
118122
return BadRequest(new { message = "Registration failed", errors });
119123
}
120124

121-
_logger.LogInformation("Initial admin user created: {Username} (ID: {UserId})", user.UserName, user.Id);
125+
// Sign in the user immediately after successful registration
126+
await _signInManager.SignInAsync(user, isPersistent: true);
127+
128+
// Update last login time since we just signed them in
129+
user.LastLoginAt = DateTimeOffset.UtcNow;
130+
await _userManager.UpdateAsync(user);
131+
132+
_logger.LogInformation("Initial admin user created and signed in: {Username} (ID: {UserId})", user.UserName, user.Id);
122133

123134
return Ok(new UserInfoDto
124135
{
@@ -250,4 +261,53 @@ public async Task<IActionResult> LogoutAsync()
250261
return StatusCode(500, new { message = "Internal server error" });
251262
}
252263
}
264+
265+
/// <summary>
266+
/// Save telemetry consent settings during onboarding
267+
/// </summary>
268+
[HttpPost("telemetry-consent")]
269+
public async Task<IActionResult> SaveTelemetryConsentAsync([FromBody] TelemetryConsentDto request)
270+
{
271+
try
272+
{
273+
// Save telemetry consent settings
274+
await _settingsService.SetAsync("telemetry_error_diagnostics", request.ErrorDiagnostics.ToString().ToLowerInvariant());
275+
await _settingsService.SetAsync("telemetry_usage_analytics", request.UsageAnalytics.ToString().ToLowerInvariant());
276+
await _settingsService.SetAsync("telemetry_consent_timestamp", DateTimeOffset.UtcNow);
277+
278+
_logger.LogInformation("Telemetry consent saved: ErrorDiagnostics={ErrorDiagnostics}, UsageAnalytics={UsageAnalytics}",
279+
request.ErrorDiagnostics, request.UsageAnalytics);
280+
281+
return Ok(new { message = "Telemetry consent saved successfully" });
282+
}
283+
catch (Exception ex)
284+
{
285+
_logger.LogError(ex, "Error saving telemetry consent");
286+
return StatusCode(500, new { message = "Internal server error" });
287+
}
288+
}
289+
290+
/// <summary>
291+
/// Get current telemetry consent settings
292+
/// </summary>
293+
[HttpGet("telemetry-consent")]
294+
public async Task<ActionResult<TelemetryConsentDto>> GetTelemetryConsentAsync()
295+
{
296+
try
297+
{
298+
string? errorDiagnostics = await _settingsService.GetAsync("telemetry_error_diagnostics");
299+
string? usageAnalytics = await _settingsService.GetAsync("telemetry_usage_analytics");
300+
301+
return Ok(new TelemetryConsentDto
302+
{
303+
ErrorDiagnostics = bool.TryParse(errorDiagnostics, out bool errorValue) && errorValue,
304+
UsageAnalytics = bool.TryParse(usageAnalytics, out bool usageValue) && usageValue
305+
});
306+
}
307+
catch (Exception ex)
308+
{
309+
_logger.LogError(ex, "Error getting telemetry consent");
310+
return StatusCode(500, new { message = "Internal server error" });
311+
}
312+
}
253313
}

ThingConnect.Pulse.Server/Models/AuthDtos.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,10 @@ public sealed class ChangePasswordDto
101101
[Required]
102102
[Compare("NewPassword")]
103103
public string ConfirmPassword { get; set; } = default!;
104+
}
105+
106+
public sealed class TelemetryConsentDto
107+
{
108+
public bool ErrorDiagnostics { get; set; }
109+
public bool UsageAnalytics { get; set; }
104110
}

ThingConnect.Pulse.Server/Services/Monitoring/MonitorState.cs

Lines changed: 76 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ namespace ThingConnect.Pulse.Server.Services.Monitoring;
55
/// <summary>
66
/// Per-endpoint in-memory state for outage detection and flap damping.
77
/// Tracks success/fail streaks and manages state transitions.
8+
/// Thread-safe with internal locking.
89
/// </summary>
910
public sealed class MonitorState
1011
{
12+
private readonly object _lock = new object();
1113
/// <summary>
1214
/// The last publicly reported status (UP/DOWN). Null if never determined.
1315
/// </summary>
@@ -40,12 +42,19 @@ public sealed class MonitorState
4042
/// </summary>
4143
public bool ShouldTransitionToDown(int threshold = 2)
4244
{
43-
// If never initialized, transition immediately on first failure
44-
if (LastPublicStatus == null && FailStreak >= 1)
45-
return true;
45+
lock (_lock)
46+
{
47+
// Must have enough failures to trigger transition
48+
if (FailStreak < Math.Max(1, threshold))
49+
return false;
50+
51+
// Handle null status (never initialized) - transition on first failure
52+
if (LastPublicStatus == null)
53+
return FailStreak >= 1;
4654

47-
// Otherwise require threshold for state change from UP to DOWN
48-
return LastPublicStatus != UpDown.down && FailStreak >= threshold;
55+
// Only transition if currently UP (not already DOWN)
56+
return LastPublicStatus == UpDown.up;
57+
}
4958
}
5059

5160
/// <summary>
@@ -55,49 +64,96 @@ public bool ShouldTransitionToDown(int threshold = 2)
5564
/// </summary>
5665
public bool ShouldTransitionToUp(int threshold = 2)
5766
{
58-
// If never initialized, transition immediately on first success
59-
if (LastPublicStatus == null && SuccessStreak >= 1)
60-
return true;
67+
lock (_lock)
68+
{
69+
// Must have enough successes to trigger transition
70+
if (SuccessStreak < Math.Max(1, threshold))
71+
return false;
72+
73+
// Handle null status (never initialized) - transition on first success
74+
if (LastPublicStatus == null)
75+
return SuccessStreak >= 1;
6176

62-
// Otherwise require threshold for state change from DOWN to UP
63-
return LastPublicStatus != UpDown.up && SuccessStreak >= threshold;
77+
// Only transition if currently DOWN (not already UP)
78+
return LastPublicStatus == UpDown.down;
79+
}
6480
}
6581

6682
/// <summary>
6783
/// Records a successful check result and updates streaks.
6884
/// </summary>
6985
public void RecordSuccess()
7086
{
71-
SuccessStreak++;
72-
FailStreak = 0;
87+
lock (_lock)
88+
{
89+
SuccessStreak++;
90+
FailStreak = 0;
91+
}
7392
}
7493

7594
/// <summary>
7695
/// Records a failed check result and updates streaks.
7796
/// </summary>
7897
public void RecordFailure()
7998
{
80-
FailStreak++;
81-
SuccessStreak = 0;
99+
lock (_lock)
100+
{
101+
FailStreak++;
102+
SuccessStreak = 0;
103+
}
82104
}
83105

84106
/// <summary>
85107
/// Transitions the state to DOWN and records the change timestamp.
86108
/// </summary>
87109
public void TransitionToDown(long timestamp, long outageId)
88110
{
89-
LastPublicStatus = UpDown.down;
90-
LastChangeTs = timestamp;
91-
OpenOutageId = outageId;
111+
lock (_lock)
112+
{
113+
LastPublicStatus = UpDown.down;
114+
LastChangeTs = timestamp;
115+
OpenOutageId = outageId;
116+
}
92117
}
93118

94119
/// <summary>
95120
/// Transitions the state to UP and records the change timestamp.
96121
/// </summary>
97122
public void TransitionToUp(long timestamp)
98123
{
99-
LastPublicStatus = UpDown.up;
100-
LastChangeTs = timestamp;
101-
OpenOutageId = null;
124+
lock (_lock)
125+
{
126+
LastPublicStatus = UpDown.up;
127+
LastChangeTs = timestamp;
128+
OpenOutageId = null;
129+
}
130+
}
131+
132+
/// <summary>
133+
/// Restores streak counters to previous values (used for rollback on transaction failures).
134+
/// </summary>
135+
public void RestoreStreakCounters(int successStreak, int failStreak)
136+
{
137+
lock (_lock)
138+
{
139+
SuccessStreak = successStreak;
140+
FailStreak = failStreak;
141+
}
142+
}
143+
144+
/// <summary>
145+
/// Validates that transition logic maintains mutual exclusivity.
146+
/// This is used for debugging and ensuring state machine correctness.
147+
/// </summary>
148+
public bool ValidateTransitionMutualExclusivity(int threshold = 2)
149+
{
150+
lock (_lock)
151+
{
152+
bool shouldTransitionDown = ShouldTransitionToDown(threshold);
153+
bool shouldTransitionUp = ShouldTransitionToUp(threshold);
154+
155+
// Both transitions should never be true simultaneously
156+
return !(shouldTransitionDown && shouldTransitionUp);
157+
}
102158
}
103159
}

0 commit comments

Comments
 (0)