@@ -5,9 +5,11 @@ namespace ThingConnect.Pulse.Server.Services.Monitoring;
55/// <summary>
66/// Per-endpoint in-memory state for outage detection and flap damping.
77/// Tracks success/fail streaks and manages state transitions.
8+ /// Thread-safe with internal locking.
89/// </summary>
910public sealed class MonitorState
1011{
12+ private readonly object _lock = new object ( ) ;
1113 /// <summary>
1214 /// The last publicly reported status (UP/DOWN). Null if never determined.
1315 /// </summary>
@@ -40,12 +42,19 @@ public sealed class MonitorState
4042 /// </summary>
4143 public bool ShouldTransitionToDown ( int threshold = 2 )
4244 {
43- // If never initialized, transition immediately on first failure
44- if ( LastPublicStatus == null && FailStreak >= 1 )
45- return true ;
45+ lock ( _lock )
46+ {
47+ // Must have enough failures to trigger transition
48+ if ( FailStreak < Math . Max ( 1 , threshold ) )
49+ return false ;
50+
51+ // Handle null status (never initialized) - transition on first failure
52+ if ( LastPublicStatus == null )
53+ return FailStreak >= 1 ;
4654
47- // Otherwise require threshold for state change from UP to DOWN
48- return LastPublicStatus != UpDown . down && FailStreak >= threshold ;
55+ // Only transition if currently UP (not already DOWN)
56+ return LastPublicStatus == UpDown . up ;
57+ }
4958 }
5059
5160 /// <summary>
@@ -55,49 +64,96 @@ public bool ShouldTransitionToDown(int threshold = 2)
5564 /// </summary>
5665 public bool ShouldTransitionToUp ( int threshold = 2 )
5766 {
58- // If never initialized, transition immediately on first success
59- if ( LastPublicStatus == null && SuccessStreak >= 1 )
60- return true ;
67+ lock ( _lock )
68+ {
69+ // Must have enough successes to trigger transition
70+ if ( SuccessStreak < Math . Max ( 1 , threshold ) )
71+ return false ;
72+
73+ // Handle null status (never initialized) - transition on first success
74+ if ( LastPublicStatus == null )
75+ return SuccessStreak >= 1 ;
6176
62- // Otherwise require threshold for state change from DOWN to UP
63- return LastPublicStatus != UpDown . up && SuccessStreak >= threshold ;
77+ // Only transition if currently DOWN (not already UP)
78+ return LastPublicStatus == UpDown . down ;
79+ }
6480 }
6581
6682 /// <summary>
6783 /// Records a successful check result and updates streaks.
6884 /// </summary>
6985 public void RecordSuccess ( )
7086 {
71- SuccessStreak ++ ;
72- FailStreak = 0 ;
87+ lock ( _lock )
88+ {
89+ SuccessStreak ++ ;
90+ FailStreak = 0 ;
91+ }
7392 }
7493
7594 /// <summary>
7695 /// Records a failed check result and updates streaks.
7796 /// </summary>
7897 public void RecordFailure ( )
7998 {
80- FailStreak ++ ;
81- SuccessStreak = 0 ;
99+ lock ( _lock )
100+ {
101+ FailStreak ++ ;
102+ SuccessStreak = 0 ;
103+ }
82104 }
83105
84106 /// <summary>
85107 /// Transitions the state to DOWN and records the change timestamp.
86108 /// </summary>
87109 public void TransitionToDown ( long timestamp , long outageId )
88110 {
89- LastPublicStatus = UpDown . down ;
90- LastChangeTs = timestamp ;
91- OpenOutageId = outageId ;
111+ lock ( _lock )
112+ {
113+ LastPublicStatus = UpDown . down ;
114+ LastChangeTs = timestamp ;
115+ OpenOutageId = outageId ;
116+ }
92117 }
93118
94119 /// <summary>
95120 /// Transitions the state to UP and records the change timestamp.
96121 /// </summary>
97122 public void TransitionToUp ( long timestamp )
98123 {
99- LastPublicStatus = UpDown . up ;
100- LastChangeTs = timestamp ;
101- OpenOutageId = null ;
124+ lock ( _lock )
125+ {
126+ LastPublicStatus = UpDown . up ;
127+ LastChangeTs = timestamp ;
128+ OpenOutageId = null ;
129+ }
130+ }
131+
132+ /// <summary>
133+ /// Restores streak counters to previous values (used for rollback on transaction failures).
134+ /// </summary>
135+ public void RestoreStreakCounters ( int successStreak , int failStreak )
136+ {
137+ lock ( _lock )
138+ {
139+ SuccessStreak = successStreak ;
140+ FailStreak = failStreak ;
141+ }
142+ }
143+
144+ /// <summary>
145+ /// Validates that transition logic maintains mutual exclusivity.
146+ /// This is used for debugging and ensuring state machine correctness.
147+ /// </summary>
148+ public bool ValidateTransitionMutualExclusivity ( int threshold = 2 )
149+ {
150+ lock ( _lock )
151+ {
152+ bool shouldTransitionDown = ShouldTransitionToDown ( threshold ) ;
153+ bool shouldTransitionUp = ShouldTransitionToUp ( threshold ) ;
154+
155+ // Both transitions should never be true simultaneously
156+ return ! ( shouldTransitionDown && shouldTransitionUp ) ;
157+ }
102158 }
103159}
0 commit comments