Skip to content

Commit b167ea5

Browse files
authored
Enhance orphan detection in Aspire AppHost to be robust against PID reuse (#10673)
1 parent 9a175dd commit b167ea5

File tree

7 files changed

+189
-4
lines changed

7 files changed

+189
-4
lines changed

src/Aspire.Cli/DotNet/DotNetCliRunner.cs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using System.Text;
99
using System.Text.Json;
1010
using Aspire.Cli.Backchannel;
11+
using Aspire.Cli.Configuration;
1112
using Aspire.Cli.Resources;
1213
using Aspire.Cli.Telemetry;
1314
using Aspire.Hosting;
@@ -41,11 +42,17 @@ internal sealed class DotNetCliRunnerInvocationOptions
4142
public bool NoLaunchProfile { get; set; }
4243
}
4344

44-
internal class DotNetCliRunner(ILogger<DotNetCliRunner> logger, IServiceProvider serviceProvider, AspireCliTelemetry telemetry, IConfiguration configuration) : IDotNetCliRunner
45+
internal class DotNetCliRunner(ILogger<DotNetCliRunner> logger, IServiceProvider serviceProvider, AspireCliTelemetry telemetry, IConfiguration configuration, IFeatures features) : IDotNetCliRunner
4546
{
4647

4748
internal Func<int> GetCurrentProcessId { get; set; } = () => Environment.ProcessId;
4849

50+
internal Func<long> GetCurrentProcessStartTime { get; set; } = () =>
51+
{
52+
var startTime = Process.GetCurrentProcess().StartTime;
53+
return ((DateTimeOffset)startTime).ToUnixTimeSeconds();
54+
};
55+
4956
private string GetMsBuildServerValue()
5057
{
5158
return configuration["DOTNET_CLI_USE_MSBUILD_SERVER"] ?? "true";
@@ -437,6 +444,13 @@ public virtual async Task<int> ExecuteAsync(string[] args, IDictionary<string, s
437444
// not exist the orphan detector will exit.
438445
startInfo.EnvironmentVariables[KnownConfigNames.CliProcessId] = GetCurrentProcessId().ToString(CultureInfo.InvariantCulture);
439446

447+
// Set the CLI process start time for robust orphan detection to prevent PID reuse issues.
448+
// The AppHost will verify both PID and start time to ensure it's monitoring the correct process.
449+
if (features.IsFeatureEnabled(KnownFeatures.OrphanDetectionWithTimestampEnabled, true))
450+
{
451+
startInfo.EnvironmentVariables[KnownConfigNames.CliProcessStarted] = GetCurrentProcessStartTime().ToString(CultureInfo.InvariantCulture);
452+
}
453+
440454
// Always set MSBUILDTERMINALLOGGER=false for all dotnet command executions to ensure consistent terminal logger behavior
441455
startInfo.EnvironmentVariables[KnownConfigNames.MsBuildTerminalLogger] = "false";
442456

src/Aspire.Cli/KnownFeatures.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ internal static class KnownFeatures
1010
public static string UpdateNotificationsEnabled => "updateNotificationsEnabled";
1111
public static string MinimumSdkCheckEnabled => "minimumSdkCheckEnabled";
1212
public static string ExecCommandEnabled => "execCommandEnabled";
13+
public static string OrphanDetectionWithTimestampEnabled => "orphanDetectionWithTimestampEnabled";
1314
}

src/Aspire.Hosting/Cli/CliOrphanDetector.cs

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,28 @@ internal sealed class CliOrphanDetector(IConfiguration configuration, IHostAppli
2222
}
2323
};
2424

25+
internal Func<int, long, bool> IsProcessRunningWithStartTime { get; set; } = (int pid, long expectedStartTimeUnix) =>
26+
{
27+
try
28+
{
29+
var process = Process.GetProcessById(pid);
30+
if (process.HasExited)
31+
{
32+
return false;
33+
}
34+
35+
// Check if the process start time matches the expected start time exactly.
36+
var actualStartTimeUnix = ((DateTimeOffset)process.StartTime).ToUnixTimeSeconds();
37+
return actualStartTimeUnix == expectedStartTimeUnix;
38+
}
39+
catch
40+
{
41+
// If we can't get the process and/or can't get the start time,
42+
// then we interpret both exceptions as the process not being there.
43+
return false;
44+
}
45+
};
46+
2547
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
2648
{
2749
try
@@ -33,11 +55,32 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken)
3355
return;
3456
}
3557

58+
// Try to get the CLI process start time for robust orphan detection
59+
long? expectedStartTimeUnix = null;
60+
if (configuration[KnownConfigNames.CliProcessStarted] is { } startTimeString &&
61+
long.TryParse(startTimeString, out var startTimeUnix))
62+
{
63+
expectedStartTimeUnix = startTimeUnix;
64+
}
65+
3666
using var periodic = new PeriodicTimer(TimeSpan.FromSeconds(1), timeProvider);
3767

3868
do
3969
{
40-
if (!IsProcessRunning(pid))
70+
bool isProcessStillRunning;
71+
72+
if (expectedStartTimeUnix.HasValue)
73+
{
74+
// Use robust process checking with start time verification
75+
isProcessStillRunning = IsProcessRunningWithStartTime(pid, expectedStartTimeUnix.Value);
76+
}
77+
else
78+
{
79+
// Fall back to PID-only logic for backwards compatibility
80+
isProcessStillRunning = IsProcessRunning(pid);
81+
}
82+
83+
if (!isProcessStillRunning)
4184
{
4285
lifetime.StopApplication();
4386
return;

src/Shared/KnownConfigNames.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ internal static class KnownConfigNames
2727
public const string WaitForDebuggerTimeout = "ASPIRE_DEBUGGER_TIMEOUT";
2828
public const string UnixSocketPath = "ASPIRE_BACKCHANNEL_PATH";
2929
public const string CliProcessId = "ASPIRE_CLI_PID";
30+
public const string CliProcessStarted = "ASPIRE_CLI_STARTED";
3031
public const string ForceRichConsole = "ASPIRE_FORCE_RICH_CONSOLE";
3132
public const string TestingDisableHttpClient = "ASPIRE_TESTING_DISABLE_HTTP_CLIENT";
3233

tests/Aspire.Cli.Tests/DotNet/DotNetCliRunnerTests.cs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using Aspire.Cli.Backchannel;
5+
using Aspire.Cli.Configuration;
56
using Aspire.Cli.DotNet;
67
using Aspire.Cli.Telemetry;
78
using Aspire.Cli.Tests.Utils;
@@ -34,6 +35,7 @@ public async Task DotNetCliCorrectlyAppliesNoLaunchProfileArgumentWhenSpecifiedI
3435
provider,
3536
new AspireCliTelemetry(),
3637
provider.GetRequiredService<IConfiguration>(),
38+
provider.GetRequiredService<IFeatures>(),
3739
(args, _, _, _, _) => Assert.Contains(args, arg => arg == "--no-launch-profile"),
3840
42
3941
);
@@ -74,6 +76,7 @@ public async Task BuildAsyncAlwaysInjectsDotnetCliUseMsBuildServerEnvironmentVar
7476
provider,
7577
new AspireCliTelemetry(),
7678
provider.GetRequiredService<IConfiguration>(),
79+
provider.GetRequiredService<IFeatures>(),
7780
(args, env, _, _, _) =>
7881
{
7982
Assert.NotNull(env);
@@ -116,6 +119,7 @@ public async Task BuildAsyncUsesConfigurationValueForDotnetCliUseMsBuildServer()
116119
provider,
117120
new AspireCliTelemetry(),
118121
provider.GetRequiredService<IConfiguration>(),
122+
provider.GetRequiredService<IFeatures>(),
119123
(args, env, _, _, _) =>
120124
{
121125
Assert.NotNull(env);
@@ -148,6 +152,7 @@ public async Task RunAsyncInjectsDotnetCliUseMsBuildServerWhenNoBuildIsFalse()
148152
provider,
149153
new AspireCliTelemetry(),
150154
provider.GetRequiredService<IConfiguration>(),
155+
provider.GetRequiredService<IFeatures>(),
151156
(args, env, _, _, _) =>
152157
{
153158
Assert.NotNull(env);
@@ -189,6 +194,7 @@ public async Task RunAsyncDoesNotInjectDotnetCliUseMsBuildServerWhenNoBuildIsTru
189194
provider,
190195
new AspireCliTelemetry(),
191196
provider.GetRequiredService<IConfiguration>(),
197+
provider.GetRequiredService<IFeatures>(),
192198
(args, env, _, _, _) =>
193199
{
194200
// When noBuild is true, the original env should be passed through unchanged
@@ -233,6 +239,7 @@ public async Task RunAsyncPreservesExistingEnvironmentVariables()
233239
provider,
234240
new AspireCliTelemetry(),
235241
provider.GetRequiredService<IConfiguration>(),
242+
provider.GetRequiredService<IFeatures>(),
236243
(args, env, _, _, _) =>
237244
{
238245
Assert.NotNull(env);
@@ -279,6 +286,7 @@ public async Task NewProjectAsyncReturnsExitCode73WhenProjectAlreadyExists()
279286
provider,
280287
new AspireCliTelemetry(),
281288
provider.GetRequiredService<IConfiguration>(),
289+
provider.GetRequiredService<IFeatures>(),
282290
(args, env, _, _, _) =>
283291
{
284292
// Verify the arguments are correct for dotnet new
@@ -305,9 +313,10 @@ internal sealed class AssertingDotNetCliRunner(
305313
IServiceProvider serviceProvider,
306314
AspireCliTelemetry telemetry,
307315
IConfiguration configuration,
316+
IFeatures features,
308317
Action<string[], IDictionary<string, string>?, DirectoryInfo, TaskCompletionSource<IAppHostBackchannel>?, DotNetCliRunnerInvocationOptions> assertionCallback,
309318
int exitCode
310-
) : DotNetCliRunner(logger, serviceProvider, telemetry, configuration)
319+
) : DotNetCliRunner(logger, serviceProvider, telemetry, configuration, features)
311320
{
312321
public override Task<int> ExecuteAsync(string[] args, IDictionary<string, string>? env, DirectoryInfo workingDirectory, TaskCompletionSource<IAppHostBackchannel>? backchannelCompletionSource, DotNetCliRunnerInvocationOptions options, CancellationToken cancellationToken)
313322
{

tests/Aspire.Cli.Tests/Hosting/CliOrphanDetectorTests.cs

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,122 @@ public async Task CliOrphanDetectorCallsStopIfEnvironmentVariablePresentAndProce
4848
Assert.True(await stopSignalChannel.Reader.WaitToReadAsync());
4949
}
5050

51+
[Fact]
52+
public async Task CliOrphanDetectorUsesTimestampDetectionWhenStartTimeProvided()
53+
{
54+
var expectedStartTime = DateTime.Now.AddMinutes(-5);
55+
var expectedStartTimeUnixSeconds = ((DateTimeOffset)expectedStartTime).ToUnixTimeSeconds();
56+
var configuration = new ConfigurationBuilder()
57+
.AddInMemoryCollection(new Dictionary<string, string?>
58+
{
59+
{ "ASPIRE_CLI_PID", "1111" },
60+
{ "ASPIRE_CLI_STARTED", expectedStartTimeUnixSeconds.ToString() }
61+
})
62+
.Build();
63+
64+
var stopSignalChannel = Channel.CreateUnbounded<bool>();
65+
var lifetime = new HostLifetimeStub(() => stopSignalChannel.Writer.TryWrite(true));
66+
67+
var detector = new CliOrphanDetector(configuration, lifetime, TimeProvider.System);
68+
detector.IsProcessRunningWithStartTime = (pid, startTime) => false;
69+
70+
await detector.StartAsync(CancellationToken.None).WaitAsync(TimeSpan.FromSeconds(5));
71+
Assert.True(await stopSignalChannel.Reader.WaitToReadAsync());
72+
}
73+
74+
[Fact]
75+
public async Task CliOrphanDetectorFallsBackToPidOnlyWhenStartTimeInvalid()
76+
{
77+
var configuration = new ConfigurationBuilder()
78+
.AddInMemoryCollection(new Dictionary<string, string?>
79+
{
80+
{ "ASPIRE_CLI_PID", "1111" },
81+
{ "ASPIRE_CLI_STARTED", "invalid_start_time" }
82+
})
83+
.Build();
84+
85+
var stopSignalChannel = Channel.CreateUnbounded<bool>();
86+
var lifetime = new HostLifetimeStub(() => stopSignalChannel.Writer.TryWrite(true));
87+
88+
var detector = new CliOrphanDetector(configuration, lifetime, TimeProvider.System);
89+
detector.IsProcessRunning = _ => false;
90+
91+
await detector.StartAsync(CancellationToken.None).WaitAsync(TimeSpan.FromSeconds(5));
92+
Assert.True(await stopSignalChannel.Reader.WaitToReadAsync());
93+
}
94+
95+
[Fact]
96+
public async Task CliOrphanDetectorContinuesRunningWhenProcessAliveWithCorrectStartTime()
97+
{
98+
var expectedStartTime = DateTime.Now.AddMinutes(-5);
99+
var expectedStartTimeUnix = ((DateTimeOffset)expectedStartTime).ToUnixTimeSeconds();
100+
var configuration = new ConfigurationBuilder()
101+
.AddInMemoryCollection(new Dictionary<string, string?>
102+
{
103+
{ "ASPIRE_CLI_PID", "1111" },
104+
{ "ASPIRE_CLI_STARTED", expectedStartTimeUnix.ToString() }
105+
})
106+
.Build();
107+
var fakeTimeProvider = new FakeTimeProvider(DateTimeOffset.Now);
108+
109+
var stopSignalChannel = Channel.CreateUnbounded<bool>();
110+
var processRunningChannel = Channel.CreateUnbounded<int>();
111+
112+
var lifetime = new HostLifetimeStub(() => stopSignalChannel.Writer.TryWrite(true));
113+
var detector = new CliOrphanDetector(configuration, lifetime, fakeTimeProvider);
114+
115+
var processRunningCallCounter = 0;
116+
detector.IsProcessRunningWithStartTime = (pid, startTime) => {
117+
Assert.True(processRunningChannel.Writer.TryWrite(++processRunningCallCounter));
118+
return processRunningCallCounter < 3; // Process dies after 3 checks
119+
};
120+
121+
await detector.StartAsync(CancellationToken.None).WaitAsync(TimeSpan.FromSeconds(5));
122+
123+
// Verify process is checked first time
124+
Assert.True(await processRunningChannel.Reader.WaitToReadAsync());
125+
fakeTimeProvider.Advance(TimeSpan.FromSeconds(1));
126+
127+
// Second check
128+
Assert.True(await processRunningChannel.Reader.WaitToReadAsync());
129+
fakeTimeProvider.Advance(TimeSpan.FromSeconds(1));
130+
131+
// Third check (process dies)
132+
Assert.True(await processRunningChannel.Reader.WaitToReadAsync());
133+
Assert.Equal(3, processRunningCallCounter);
134+
135+
// Should stop the application
136+
Assert.True(await stopSignalChannel.Reader.WaitToReadAsync());
137+
}
138+
139+
[Fact]
140+
public async Task CliOrphanDetectorStopsWhenProcessHasDifferentStartTime()
141+
{
142+
var expectedStartTime = DateTime.Now.AddMinutes(-5);
143+
var expectedStartTimeUnixString = ((DateTimeOffset)expectedStartTime).ToUnixTimeSeconds().ToString();
144+
var configuration = new ConfigurationBuilder()
145+
.AddInMemoryCollection(new Dictionary<string, string?>
146+
{
147+
{ "ASPIRE_CLI_PID", "1111" },
148+
{ "ASPIRE_CLI_STARTED", expectedStartTimeUnixString }
149+
})
150+
.Build();
151+
152+
var stopSignalChannel = Channel.CreateUnbounded<bool>();
153+
var lifetime = new HostLifetimeStub(() => stopSignalChannel.Writer.TryWrite(true));
154+
155+
var detector = new CliOrphanDetector(configuration, lifetime, TimeProvider.System);
156+
157+
// Simulate process with different start time (PID reuse scenario)
158+
detector.IsProcessRunningWithStartTime = (pid, startTime) => {
159+
// Process exists but has different start time - indicates PID reuse
160+
return false;
161+
};
162+
163+
await detector.StartAsync(CancellationToken.None).WaitAsync(TimeSpan.FromSeconds(5));
164+
Assert.True(await stopSignalChannel.Reader.WaitToReadAsync());
165+
}
166+
51167
[Fact]
52168
public async Task CliOrphanDetectorAfterTheProcessWasRunningForAWhileThenStops()
53169
{

tests/Aspire.Cli.Tests/Utils/CliTestHelper.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,8 @@ public IProjectLocator CreateDefaultProjectLocatorFactory(IServiceProvider servi
203203
var logger = serviceProvider.GetRequiredService<ILogger<DotNetCliRunner>>();
204204
var telemetry = serviceProvider.GetRequiredService<AspireCliTelemetry>();
205205
var configuration = serviceProvider.GetRequiredService<IConfiguration>();
206-
return new DotNetCliRunner(logger, serviceProvider, telemetry, configuration);
206+
var features = serviceProvider.GetRequiredService<IFeatures>();
207+
return new DotNetCliRunner(logger, serviceProvider, telemetry, configuration, features);
207208
};
208209

209210
public Func<IServiceProvider, IDotNetSdkInstaller> DotNetSdkInstallerFactory { get; set; } = (IServiceProvider serviceProvider) =>

0 commit comments

Comments
 (0)