Skip to content

Commit 8796f00

Browse files
Copilotdthaler
andauthored
Add configurable hour offset for reboot checks to prevent staging/production conflicts (#395)
* Initial plan * Implement configurable hour offset for reboot checks to avoid staging/production conflicts Co-authored-by: dthaler <[email protected]> * Apply suggestions from code review * Add missing test case and remove manual test file as requested Co-authored-by: dthaler <[email protected]> --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: dthaler <[email protected]> Co-authored-by: Dave Thaler <[email protected]>
1 parent ee76153 commit 8796f00

File tree

3 files changed

+150
-3
lines changed

3 files changed

+150
-3
lines changed

OrcanodeMonitor/Models/Orcanode.cs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -443,10 +443,24 @@ public bool NeedsRebootForContainerRestart
443443
}
444444
TimeSpan interval = PeriodicTasks.FrequencyToPoll;
445445
DateTime utcNow = DateTime.UtcNow;
446-
TimeSpan sinceTopOfHour = utcNow - utcNow.Date.AddHours(utcNow.Hour);
447-
if (sinceTopOfHour >= interval)
446+
447+
// Get the configured hour offset for reboot timing.
448+
string? hourOffsetString = Environment.GetEnvironmentVariable("ORCASOUND_REBOOT_HOUR_OFFSET_MINUTES");
449+
int hourOffsetMinutes = int.TryParse(hourOffsetString, out var offset) ? offset : 0;
450+
451+
// Calculate time since the configured offset within the hour.
452+
DateTime hourWithOffset = utcNow.Date.AddHours(utcNow.Hour).AddMinutes(hourOffsetMinutes);
453+
TimeSpan sinceOffsetTime = utcNow - hourWithOffset;
454+
455+
// Handle case where offset pushes us to next hour.
456+
if (sinceOffsetTime < TimeSpan.Zero)
448457
{
449-
// Only reboot within the first polling interval of the hour.
458+
sinceOffsetTime = sinceOffsetTime.Add(TimeSpan.FromHours(1));
459+
}
460+
461+
if (sinceOffsetTime >= interval)
462+
{
463+
// Only reboot within the first polling interval after the offset time.
450464
// This is so we only try a reboot at most once per hour.
451465
return false;
452466
}

Test/RebootOffsetTests.cs

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
using Microsoft.VisualStudio.TestTools.UnitTesting;
2+
using System;
3+
using OrcanodeMonitor.Models;
4+
5+
namespace Test
6+
{
7+
[TestClass]
8+
public class RebootOffsetTests
9+
{
10+
[TestMethod]
11+
public void TestDefaultRebootOffset()
12+
{
13+
// Clear environment variable to test default behavior.
14+
Environment.SetEnvironmentVariable("ORCASOUND_REBOOT_HOUR_OFFSET_MINUTES", null);
15+
16+
// Create a test node with required properties for reboot check.
17+
var node = new Orcanode
18+
{
19+
DataplicityOnline = true,
20+
// Set properties to make S3StreamStatus return Offline.
21+
LatestRecordedUtc = DateTime.UtcNow.AddMinutes(-10),
22+
ManifestUpdatedUtc = DateTime.UtcNow.AddMinutes(-10),
23+
LastCheckedUtc = DateTime.UtcNow
24+
};
25+
26+
// The exact timing will depend on when this test runs, but the logic should not crash
27+
// and should return a boolean value.
28+
bool needsReboot = node.NeedsRebootForContainerRestart;
29+
30+
// Just verify the property can be accessed without exception.
31+
Assert.IsTrue(needsReboot == true || needsReboot == false, "Property should return a valid boolean");
32+
}
33+
34+
[TestMethod]
35+
public void TestCustomRebootOffset()
36+
{
37+
// Set a 30-minute offset for staging scenario.
38+
Environment.SetEnvironmentVariable("ORCASOUND_REBOOT_HOUR_OFFSET_MINUTES", "30");
39+
40+
// Create a test node with required properties for reboot check.
41+
var node = new Orcanode
42+
{
43+
DataplicityOnline = true,
44+
// Set properties to make S3StreamStatus return Offline.
45+
LatestRecordedUtc = DateTime.UtcNow.AddMinutes(-10),
46+
ManifestUpdatedUtc = DateTime.UtcNow.AddMinutes(-10),
47+
LastCheckedUtc = DateTime.UtcNow
48+
};
49+
50+
// The exact timing will depend on when this test runs, but the logic should not crash
51+
// and should return a boolean value.
52+
bool needsReboot = node.NeedsRebootForContainerRestart;
53+
54+
// Just verify the property can be accessed without exception.
55+
Assert.IsTrue(needsReboot == true || needsReboot == false, "Property should return a valid boolean");
56+
57+
// Clean up.
58+
Environment.SetEnvironmentVariable("ORCASOUND_REBOOT_HOUR_OFFSET_MINUTES", null);
59+
}
60+
61+
[TestMethod]
62+
public void TestInvalidRebootOffset()
63+
{
64+
// Set an invalid offset value.
65+
Environment.SetEnvironmentVariable("ORCASOUND_REBOOT_HOUR_OFFSET_MINUTES", "invalid");
66+
67+
// Create a test node with required properties for reboot check.
68+
var node = new Orcanode
69+
{
70+
DataplicityOnline = true,
71+
// Set properties to make S3StreamStatus return Offline.
72+
LatestRecordedUtc = DateTime.UtcNow.AddMinutes(-10),
73+
ManifestUpdatedUtc = DateTime.UtcNow.AddMinutes(-10),
74+
LastCheckedUtc = DateTime.UtcNow
75+
};
76+
77+
// Should handle invalid value gracefully and default to 0.
78+
bool needsReboot = node.NeedsRebootForContainerRestart;
79+
80+
// Just verify the property can be accessed without exception.
81+
Assert.IsTrue(needsReboot == true || needsReboot == false, "Property should return a valid boolean");
82+
83+
// Clean up.
84+
Environment.SetEnvironmentVariable("ORCASOUND_REBOOT_HOUR_OFFSET_MINUTES", null);
85+
}
86+
87+
[TestMethod]
88+
public void TestRebootRequirements()
89+
{
90+
// Test that reboot is not needed when Dataplicity is offline.
91+
var nodeOffline = new Orcanode
92+
{
93+
DataplicityOnline = false,
94+
// Set properties to make S3StreamStatus return Offline.
95+
LatestRecordedUtc = DateTime.UtcNow.AddMinutes(-10),
96+
ManifestUpdatedUtc = DateTime.UtcNow.AddMinutes(-10),
97+
LastCheckedUtc = DateTime.UtcNow
98+
};
99+
100+
Assert.IsFalse(nodeOffline.NeedsRebootForContainerRestart, "Should not need reboot when Dataplicity is offline");
101+
102+
// Test that reboot is not needed when S3 stream is online (by setting recent data).
103+
var nodeStreamOnline = new Orcanode
104+
{
105+
DataplicityOnline = true,
106+
// Set properties to make S3StreamStatus return Online (recent data).
107+
LatestRecordedUtc = DateTime.UtcNow,
108+
ManifestUpdatedUtc = DateTime.UtcNow,
109+
LastCheckedUtc = DateTime.UtcNow
110+
};
111+
112+
Assert.IsFalse(nodeStreamOnline.NeedsRebootForContainerRestart, "Should not need reboot when S3 stream is online");
113+
114+
// Test that reboot IS needed when S3 stream is offline and Dataplicity is online.
115+
var nodeNeedsReboot = new Orcanode
116+
{
117+
DataplicityOnline = true,
118+
// Set properties to make S3StreamStatus return Offline.
119+
LatestRecordedUtc = DateTime.UtcNow.AddMinutes(-10),
120+
ManifestUpdatedUtc = DateTime.UtcNow.AddMinutes(-10),
121+
LastCheckedUtc = DateTime.UtcNow
122+
};
123+
124+
// This test verifies the core functionality - when Dataplicity is online but S3 stream is offline,
125+
// the node should need a reboot (subject to timing constraints).
126+
bool needsRebootResult = nodeNeedsReboot.NeedsRebootForContainerRestart;
127+
// Note: The actual result depends on current time vs offset timing, but it should not throw an exception
128+
Assert.IsTrue(needsRebootResult == true || needsRebootResult == false, "Property should return a valid boolean when S3 offline and Dataplicity online");
129+
}
130+
}
131+
}

docs/Design.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ The following state will be stored per orcanode:
111111

112112
**ORCASOUND_POLL_FREQUENCY_IN_MINUTES**: Service will poll each orcanode at the configured frequency. Default: 5
113113

114+
**ORCASOUND_REBOOT_HOUR_OFFSET_MINUTES**: The number of minutes from the top of the hour when reboot checks should occur. This allows multiple deployments (production and staging) to avoid conflicts by running at different times. Default: 0 (top of hour)
115+
114116
**ORCASOUND_MAX_UPLOAD_DELAY_MINUTES**: If the manifest file is older than this, the node will be considered offline. Default: 2
115117

116118
**MEZMO_LOG_SECONDS**: The number of seconds of Mezmo logs to check for activity. Default: 60

0 commit comments

Comments
 (0)