@@ -12,13 +12,13 @@ namespace ThingConnect.Pulse.Server.Services.Monitoring;
1212/// </summary>
1313public sealed class OutageDetectionService : IOutageDetectionService
1414{
15- private readonly PulseDbContext _context ;
15+ private readonly IServiceProvider _serviceProvider ;
1616 private readonly ILogger < OutageDetectionService > _logger ;
1717 private readonly ConcurrentDictionary < Guid , MonitorState > _states = new ( ) ;
1818
19- public OutageDetectionService ( PulseDbContext context , ILogger < OutageDetectionService > logger )
19+ public OutageDetectionService ( IServiceProvider serviceProvider , ILogger < OutageDetectionService > logger )
2020 {
21- _context = context ;
21+ _serviceProvider = serviceProvider ;
2222 _logger = logger ;
2323 }
2424
@@ -37,10 +37,18 @@ public async Task<bool> ProcessCheckResultAsync(CheckResult result, Cancellation
3737 if ( result . Status == UpDown . up )
3838 {
3939 state . RecordSuccess ( ) ;
40+ _logger . LogDebug (
41+ "RecordSuccess called for endpoint {EndpointId}. SuccessStreak={SuccessStreak}, FailStreak={FailStreak}" ,
42+ result . EndpointId , state . SuccessStreak , state . FailStreak
43+ ) ;
4044 }
4145 else
4246 {
4347 state . RecordFailure ( ) ;
48+ _logger . LogDebug (
49+ "RecordFailure called for endpoint {EndpointId}. SuccessStreak={SuccessStreak}, FailStreak={FailStreak}, Error={Error}" ,
50+ result . EndpointId , state . SuccessStreak , state . FailStreak , result . Error
51+ ) ;
4452 }
4553
4654 // Check for DOWN transition
@@ -85,17 +93,20 @@ public async Task<bool> ProcessCheckResultAsync(CheckResult result, Cancellation
8593 /// <returns><placeholder>A <see cref="Task"/> representing the asynchronous operation.</placeholder></returns>
8694 public async Task InitializeStatesFromDatabaseAsync ( CancellationToken cancellationToken = default )
8795 {
96+ using IServiceScope scope = _serviceProvider . CreateScope ( ) ;
97+ PulseDbContext context = scope . ServiceProvider . GetRequiredService < PulseDbContext > ( ) ;
98+
8899 try
89100 {
90101 long now = UnixTimestamp . Now ( ) ;
91102
92103 // Check for monitoring gap using multiple signals for reliability
93- MonitoringSession ? lastSession = await _context . MonitoringSessions
104+ MonitoringSession ? lastSession = await context . MonitoringSessions
94105 . OrderByDescending ( s => s . StartedTs )
95106 . FirstOrDefaultAsync ( cancellationToken ) ;
96107
97108 // Get the most recent check result timestamp (most reliable signal)
98- long ? lastCheckTime = await _context . CheckResultsRaw
109+ long ? lastCheckTime = await context . CheckResultsRaw
99110 . Select ( c => c . Ts )
100111 . OrderByDescending ( ts => ts )
101112 . FirstOrDefaultAsync ( cancellationToken ) ;
@@ -113,7 +124,7 @@ public async Task InitializeStatesFromDatabaseAsync(CancellationToken cancellati
113124 lastSession ? . StartedTs ; // Last resort (potentially very old)
114125
115126 // Load endpoints once for both gap analysis and state initialization
116- List < Data . Endpoint > endpoints = await _context . Endpoints
127+ List < Data . Endpoint > endpoints = await context . Endpoints
117128 . Where ( e => e . Enabled )
118129 . ToListAsync ( cancellationToken ) ;
119130
@@ -130,7 +141,7 @@ public async Task InitializeStatesFromDatabaseAsync(CancellationToken cancellati
130141 gapDuration , UnixTimestamp . FromUnixSeconds ( lastMonitoringTime . Value ) ,
131142 endpointsWithGaps . Count , endpoints . Count ) ;
132143
133- await HandleMonitoringGapAsync ( lastMonitoringTime . Value , now , endpointsWithGaps , cancellationToken ) ;
144+ await HandleMonitoringGapAsync ( context , lastMonitoringTime . Value , now , endpointsWithGaps , cancellationToken ) ;
134145 }
135146 else
136147 {
@@ -146,10 +157,10 @@ public async Task InitializeStatesFromDatabaseAsync(CancellationToken cancellati
146157 Version = GetType ( ) . Assembly . GetName ( ) . Version ? . ToString ( )
147158 } ;
148159
149- _context . MonitoringSessions . Add ( newSession ) ;
150- await _context . SaveChangesAsync ( cancellationToken ) ;
160+ context . MonitoringSessions . Add ( newSession ) ;
161+ await context . SaveChangesAsync ( cancellationToken ) ;
151162
152- List < Outage > openOutages = await _context . Outages
163+ List < Outage > openOutages = await context . Outages
153164 . Where ( o => o . EndedTs == null )
154165 . ToListAsync ( cancellationToken ) ;
155166
@@ -166,7 +177,7 @@ public async Task InitializeStatesFromDatabaseAsync(CancellationToken cancellati
166177
167178 // Validate and fix inconsistent states
168179 ( UpDown ? correctedStatus , long ? correctedOutageId , bool wasInconsistent ) = await ValidateAndFixStateConsistencyAsync (
169- endpoint . Id , endpointStatus , hasOpenOutage ? outageId : null , endpoint . LastChangeTs , cancellationToken ) ;
180+ context , endpoint . Id , endpointStatus , hasOpenOutage ? outageId : null , endpoint . LastChangeTs , cancellationToken ) ;
170181
171182 if ( wasInconsistent )
172183 {
@@ -195,7 +206,7 @@ public async Task InitializeStatesFromDatabaseAsync(CancellationToken cancellati
195206 // Save any endpoint status corrections
196207 if ( inconsistenciesFixed > 0 )
197208 {
198- await _context . SaveChangesAsync ( cancellationToken ) ;
209+ await context . SaveChangesAsync ( cancellationToken ) ;
199210 }
200211
201212 if ( inconsistenciesFixed > 0 )
@@ -245,14 +256,14 @@ public async Task InitializeStatesFromDatabaseAsync(CancellationToken cancellati
245256 return Task . FromResult ( affectedEndpoints ) ;
246257 }
247258
248- private async Task HandleMonitoringGapAsync ( long lastMonitoringTime ,
259+ private async Task HandleMonitoringGapAsync ( PulseDbContext context , long lastMonitoringTime ,
249260 long now , List < Data . Endpoint > affectedEndpoints , CancellationToken cancellationToken )
250261 {
251262 // Get affected endpoint IDs for filtering
252263 var affectedEndpointIds = affectedEndpoints . Select ( e => e . Id ) . ToHashSet ( ) ;
253264
254265 // Handle open outages only for affected endpoints
255- List < Outage > outagesForAffectedEndpoints = await _context . Outages
266+ List < Outage > outagesForAffectedEndpoints = await context . Outages
256267 . Where ( o => o . EndedTs == null &&
257268 o . StartedTs < lastMonitoringTime &&
258269 affectedEndpointIds . Contains ( o . EndpointId ) )
@@ -285,7 +296,7 @@ private async Task HandleMonitoringGapAsync(long lastMonitoringTime,
285296 endpoint . Id , endpoint . Name ) ;
286297 }
287298
288- await _context . SaveChangesAsync ( cancellationToken ) ;
299+ await context . SaveChangesAsync ( cancellationToken ) ;
289300
290301 _logger . LogInformation ( "Handled monitoring gap: closed {OutageCount} outages and reset {EndpointCount} endpoint statuses" ,
291302 outagesForAffectedEndpoints . Count , affectedEndpoints . Count ) ;
@@ -297,12 +308,15 @@ private async Task HandleMonitoringGapAsync(long lastMonitoringTime,
297308 /// <returns><placeholder>A <see cref="Task"/> representing the asynchronous operation.</placeholder></returns>
298309 public async Task HandleGracefulShutdownAsync ( string ? shutdownReason = null , CancellationToken cancellationToken = default )
299310 {
311+ using IServiceScope scope = _serviceProvider . CreateScope ( ) ;
312+ PulseDbContext context = scope . ServiceProvider . GetRequiredService < PulseDbContext > ( ) ;
313+
300314 try
301315 {
302316 long now = UnixTimestamp . Now ( ) ;
303317
304318 // Close current monitoring session
305- MonitoringSession ? currentSession = await _context . MonitoringSessions
319+ MonitoringSession ? currentSession = await context . MonitoringSessions
306320 . Where ( s => s . EndedTs == null )
307321 . OrderByDescending ( s => s . StartedTs )
308322 . FirstOrDefaultAsync ( cancellationToken ) ;
@@ -316,7 +330,7 @@ public async Task HandleGracefulShutdownAsync(string? shutdownReason = null, Can
316330 }
317331
318332 // Mark all open outages with monitoring stop time (but don't close them yet)
319- List < Outage > openOutages = await _context . Outages
333+ List < Outage > openOutages = await context . Outages
320334 . Where ( o => o . EndedTs == null && o . MonitoringStoppedTs == null )
321335 . ToListAsync ( cancellationToken ) ;
322336
@@ -325,7 +339,7 @@ public async Task HandleGracefulShutdownAsync(string? shutdownReason = null, Can
325339 outage . MonitoringStoppedTs = now ;
326340 }
327341
328- await _context . SaveChangesAsync ( cancellationToken ) ;
342+ await context . SaveChangesAsync ( cancellationToken ) ;
329343
330344 _logger . LogInformation ( "Gracefully shut down monitoring, marked {Count} open outages" , openOutages . Count ) ;
331345 }
@@ -338,8 +352,11 @@ public async Task HandleGracefulShutdownAsync(string? shutdownReason = null, Can
338352 private async Task TransitionToDownAsync ( Guid endpointId , MonitorState state , long timestamp ,
339353 string ? error , CancellationToken cancellationToken )
340354 {
355+ using IServiceScope scope = _serviceProvider . CreateScope ( ) ;
356+ PulseDbContext context = scope . ServiceProvider . GetRequiredService < PulseDbContext > ( ) ;
357+
341358 // Use database transaction to ensure atomicity of all changes
342- using Microsoft . EntityFrameworkCore . Storage . IDbContextTransaction transaction = await _context . Database . BeginTransactionAsync ( cancellationToken ) ;
359+ using Microsoft . EntityFrameworkCore . Storage . IDbContextTransaction transaction = await context . Database . BeginTransactionAsync ( cancellationToken ) ;
343360
344361 try
345362 {
@@ -351,12 +368,12 @@ private async Task TransitionToDownAsync(Guid endpointId, MonitorState state, lo
351368 LastError = error
352369 } ;
353370
354- _context . Outages . Add ( outage ) ;
371+ context . Outages . Add ( outage ) ;
355372
356373 // Update endpoint's last status and change timestamp in same transaction
357- await UpdateEndpointStatusAsync ( endpointId , UpDown . down , timestamp , cancellationToken ) ;
358-
359- // Commit transaction - EF Core will save and commit all changes atomically
374+ await UpdateEndpointStatusAsync ( context , endpointId , UpDown . down , timestamp , cancellationToken ) ;
375+ await context . SaveChangesAsync ( cancellationToken ) ;
376+ // Commit transaction - EF Core will save and
360377 await transaction . CommitAsync ( cancellationToken ) ;
361378
362379 // Update in-memory state ONLY after successful database commit
@@ -375,16 +392,19 @@ private async Task TransitionToDownAsync(Guid endpointId, MonitorState state, lo
375392 private async Task TransitionToUpAsync ( Guid endpointId , MonitorState state , long timestamp ,
376393 CancellationToken cancellationToken )
377394 {
395+ using IServiceScope scope = _serviceProvider . CreateScope ( ) ;
396+ PulseDbContext context = scope . ServiceProvider . GetRequiredService < PulseDbContext > ( ) ;
397+
378398 // Use database transaction to ensure atomicity of all changes
379- using Microsoft . EntityFrameworkCore . Storage . IDbContextTransaction transaction = await _context . Database . BeginTransactionAsync ( cancellationToken ) ;
399+ using Microsoft . EntityFrameworkCore . Storage . IDbContextTransaction transaction = await context . Database . BeginTransactionAsync ( cancellationToken ) ;
380400
381401 try
382402 {
383403 // Close existing outage if any - query database for actual open outage
384404 long ? closedOutageId = null ;
385405 int ? outageDurationSeconds = null ;
386406
387- Outage ? openOutage = await _context . Outages
407+ Outage ? openOutage = await context . Outages
388408 . Where ( o => o . EndpointId == endpointId && o . EndedTs == null )
389409 . FirstOrDefaultAsync ( cancellationToken ) ;
390410
@@ -397,8 +417,8 @@ private async Task TransitionToUpAsync(Guid endpointId, MonitorState state, long
397417 }
398418
399419 // Update endpoint's last status and change timestamp in same transaction
400- await UpdateEndpointStatusAsync ( endpointId , UpDown . up , timestamp , cancellationToken ) ;
401-
420+ await UpdateEndpointStatusAsync ( context , endpointId , UpDown . up , timestamp , cancellationToken ) ;
421+ await context . SaveChangesAsync ( cancellationToken ) ;
402422 // Commit transaction - EF Core will save and commit all changes atomically
403423 await transaction . CommitAsync ( cancellationToken ) ;
404424
@@ -419,10 +439,10 @@ private async Task TransitionToUpAsync(Guid endpointId, MonitorState state, long
419439 }
420440 }
421441
422- private async Task UpdateEndpointStatusAsync ( Guid endpointId , UpDown status , long timestamp ,
442+ private async Task UpdateEndpointStatusAsync ( PulseDbContext context , Guid endpointId , UpDown status , long timestamp ,
423443 CancellationToken cancellationToken )
424444 {
425- Data . Endpoint ? endpoint = await _context . Endpoints . FindAsync ( [ endpointId ] , cancellationToken ) ;
445+ Data . Endpoint ? endpoint = await context . Endpoints . FindAsync ( [ endpointId ] , cancellationToken ) ;
426446 if ( endpoint != null )
427447 {
428448 endpoint . LastStatus = status ;
@@ -435,7 +455,7 @@ private async Task UpdateEndpointStatusAsync(Guid endpointId, UpDown status, lon
435455 /// Returns corrected status, outage ID, and whether inconsistency was found.
436456 /// </summary>
437457 private async Task < ( UpDown ? correctedStatus , long ? correctedOutageId , bool wasInconsistent ) > ValidateAndFixStateConsistencyAsync (
438- Guid endpointId , UpDown ? endpointStatus , long ? openOutageId , long ? endpointLastChangeTs , CancellationToken cancellationToken )
458+ PulseDbContext context , Guid endpointId , UpDown ? endpointStatus , long ? openOutageId , long ? endpointLastChangeTs , CancellationToken cancellationToken )
439459 {
440460 bool hasOpenOutage = openOutageId . HasValue ;
441461
@@ -454,7 +474,7 @@ private async Task UpdateEndpointStatusAsync(Guid endpointId, UpDown status, lon
454474 endpointId , openOutageId ) ;
455475
456476 // Close the outage - use endpoint's LastChangeTs if available, otherwise fall back to now
457- Outage ? outage = await _context . Outages . FindAsync ( [ openOutageId ! . Value ] , cancellationToken ) ;
477+ Outage ? outage = await context . Outages . FindAsync ( [ openOutageId ! . Value ] , cancellationToken ) ;
458478 if ( outage != null )
459479 {
460480 long outageEndTime = endpointLastChangeTs ?? UnixTimestamp . Now ( ) ;
@@ -485,8 +505,8 @@ private async Task UpdateEndpointStatusAsync(Guid endpointId, UpDown status, lon
485505 : "Auto-created: Endpoint status was DOWN during service restart"
486506 } ;
487507
488- _context . Outages . Add ( outage ) ;
489- await _context . SaveChangesAsync ( cancellationToken ) ; // Need ID for return
508+ context . Outages . Add ( outage ) ;
509+ await context . SaveChangesAsync ( cancellationToken ) ; // Need ID for return
490510
491511 return ( UpDown . down , outage . Id , true ) ;
492512 }
@@ -497,6 +517,9 @@ private async Task UpdateEndpointStatusAsync(Guid endpointId, UpDown status, lon
497517
498518 private async Task SaveCheckResultAsync ( CheckResult result , CancellationToken cancellationToken )
499519 {
520+ using IServiceScope scope = _serviceProvider . CreateScope ( ) ;
521+ PulseDbContext context = scope . ServiceProvider . GetRequiredService < PulseDbContext > ( ) ;
522+
500523 CheckResultRaw rawResult = new CheckResultRaw
501524 {
502525 EndpointId = result . EndpointId ,
@@ -506,7 +529,7 @@ private async Task SaveCheckResultAsync(CheckResult result, CancellationToken ca
506529 Error = result . Error
507530 } ;
508531
509- _context . CheckResultsRaw . Add ( rawResult ) ;
510- await _context . SaveChangesAsync ( cancellationToken ) ;
532+ context . CheckResultsRaw . Add ( rawResult ) ;
533+ await context . SaveChangesAsync ( cancellationToken ) ;
511534 }
512535}
0 commit comments