Skip to content

Commit 220408e

Browse files
noahfalkmdh1418
andauthored
Change SocketException to ServerNotAvailable + misc test fixes/logging (#5354)
This started out as a bit of test logging. Then the unrelated GetProcessInfo test failed in CI so the change grew into a small product fix + more test improvements. Product changes: - Connecting to the IPC channel might fail when the runtime is starting up, shutting down, or is overloaded. If so the socket.Connect() call is going to throw a SocketException. In order to avoid having callers to DiagnosticClient APIs understand all the implrementation details and have to use catch clauses for a wide variety of different exceptions we now catch this exception and rethrow it wrapped as a ServerNotAvailableException. Test changes: - Add logging to the DuplicateMetricNames test to improve diagnostics when it hangs. - Add retries to the GetProcessInfo requests that are running while the target process is starting up. There is no guarantee the runtime is listening to the socket at this point and connection requests may be refused. This may resolve one cause of flakiness noted in #2760. The issue also shows a 'ReadPastEndOfStream' problem that is not addressed by this fix. - Add verification that the ExePath returned by DebuggeeCompiler actually exists on disk. This catches some build/config issues in the test setup more eagerly. - Add verification that the BuildProjectFramework and TargetConfiguration used by SdkPrebuiltDebuggeeCompiler are populated. This catches some specific test configuration errors more eagerly. - Tweaked some out-of-date comments --------- Co-authored-by: Mitchell Hwang <[email protected]>
1 parent 3d11b96 commit 220408e

File tree

7 files changed

+115
-54
lines changed

7 files changed

+115
-54
lines changed

eng/InstallRuntimes.proj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@
113113
</Target>
114114

115115
<!--
116-
Writes the Debugger.Tests.Versions.txt file used by the SOS test harness
116+
Writes the Debugger.Tests.Versions.txt file used by tests in this repo to determine which runtimes to test against
117117
-->
118118

119119
<Target Name="WriteTestVersionManifest"

src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/DiagnosticsClientExceptions.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ public UnsupportedProtocolException(string msg) : base(msg) { }
2121
public class ServerNotAvailableException : DiagnosticsClientException
2222
{
2323
public ServerNotAvailableException(string msg) : base(msg) { }
24+
public ServerNotAvailableException(string msg, Exception exception) : base(msg, exception) { }
2425
}
2526

2627
// When the runtime responded with an error

src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsIpc/IpcTransport.cs

Lines changed: 66 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System.IO;
77
using System.IO.Pipes;
88
using System.Linq;
9+
using System.Net.Sockets;
910
using System.Runtime.InteropServices;
1011
using System.Security.Principal;
1112
using System.Threading;
@@ -20,6 +21,7 @@ internal abstract class IpcEndpoint
2021
/// </summary>
2122
/// <param name="timeout">The amount of time to block attempting to connect</param>
2223
/// <returns>A stream used for writing and reading data to and from the target .NET process</returns>
24+
/// <throws>ServerNotAvailableException</throws>
2325
public abstract Stream Connect(TimeSpan timeout);
2426

2527
/// <summary>
@@ -29,6 +31,7 @@ internal abstract class IpcEndpoint
2931
/// <returns>
3032
/// A task that completes with a stream used for writing and reading data to and from the target .NET process.
3133
/// </returns>
34+
/// <throws>ServerNotAvailableException</throws>
3235
public abstract Task<Stream> ConnectAsync(CancellationToken token);
3336

3437
/// <summary>
@@ -51,66 +54,81 @@ internal static class IpcEndpointHelper
5154
{
5255
public static Stream Connect(IpcEndpointConfig config, TimeSpan timeout)
5356
{
54-
if (config.Transport == IpcEndpointConfig.TransportType.NamedPipe)
55-
{
56-
NamedPipeClientStream namedPipe = new(
57-
".",
58-
config.Address,
59-
PipeDirection.InOut,
60-
PipeOptions.None,
61-
TokenImpersonationLevel.Impersonation);
62-
namedPipe.Connect((int)timeout.TotalMilliseconds);
63-
return namedPipe;
64-
}
65-
else if (config.Transport == IpcEndpointConfig.TransportType.UnixDomainSocket)
57+
try
6658
{
67-
IpcUnixDomainSocket socket = new();
68-
socket.Connect(new IpcUnixDomainSocketEndPoint(config.Address), timeout);
69-
return new ExposedSocketNetworkStream(socket, ownsSocket: true);
70-
}
59+
if (config.Transport == IpcEndpointConfig.TransportType.NamedPipe)
60+
{
61+
NamedPipeClientStream namedPipe = new(
62+
".",
63+
config.Address,
64+
PipeDirection.InOut,
65+
PipeOptions.None,
66+
TokenImpersonationLevel.Impersonation);
67+
namedPipe.Connect((int)timeout.TotalMilliseconds);
68+
return namedPipe;
69+
}
70+
else if (config.Transport == IpcEndpointConfig.TransportType.UnixDomainSocket)
71+
{
72+
IpcUnixDomainSocket socket = new();
73+
socket.Connect(new IpcUnixDomainSocketEndPoint(config.Address), timeout);
74+
return new ExposedSocketNetworkStream(socket, ownsSocket: true);
75+
}
7176
#if DIAGNOSTICS_RUNTIME
72-
else if (config.Transport == IpcEndpointConfig.TransportType.TcpSocket)
73-
{
74-
var tcpClient = new TcpClient ();
75-
var endPoint = new IpcTcpSocketEndPoint(config.Address);
76-
tcpClient.Connect(endPoint.EndPoint);
77-
return tcpClient.GetStream();
78-
}
77+
else if (config.Transport == IpcEndpointConfig.TransportType.TcpSocket)
78+
{
79+
var tcpClient = new TcpClient ();
80+
var endPoint = new IpcTcpSocketEndPoint(config.Address);
81+
tcpClient.Connect(endPoint.EndPoint);
82+
return tcpClient.GetStream();
83+
}
7984
#endif
80-
else
85+
else
86+
{
87+
throw new ArgumentException($"Unsupported IpcEndpointConfig transport type {config.Transport}");
88+
}
89+
90+
}
91+
catch (SocketException ex)
8192
{
82-
throw new ArgumentException($"Unsupported IpcEndpointConfig transport type {config.Transport}");
93+
throw new ServerNotAvailableException($"Unable to connect to the server. {ex.Message}", ex);
8394
}
8495
}
8596

8697
public static async Task<Stream> ConnectAsync(IpcEndpointConfig config, CancellationToken token)
8798
{
88-
if (config.Transport == IpcEndpointConfig.TransportType.NamedPipe)
89-
{
90-
NamedPipeClientStream namedPipe = new(
91-
".",
92-
config.Address,
93-
PipeDirection.InOut,
94-
PipeOptions.Asynchronous,
95-
TokenImpersonationLevel.Impersonation);
96-
97-
// Pass non-infinite timeout in order to cause internal connection algorithm
98-
// to check the CancellationToken periodically. Otherwise, if the named pipe
99-
// is waited using WaitNamedPipe with an infinite timeout, then the
100-
// CancellationToken cannot be observed.
101-
await namedPipe.ConnectAsync(int.MaxValue, token).ConfigureAwait(false);
102-
103-
return namedPipe;
104-
}
105-
else if (config.Transport == IpcEndpointConfig.TransportType.UnixDomainSocket)
99+
try
106100
{
107-
IpcUnixDomainSocket socket = new();
108-
await socket.ConnectAsync(new IpcUnixDomainSocketEndPoint(config.Address), token).ConfigureAwait(false);
109-
return new ExposedSocketNetworkStream(socket, ownsSocket: true);
101+
if (config.Transport == IpcEndpointConfig.TransportType.NamedPipe)
102+
{
103+
NamedPipeClientStream namedPipe = new(
104+
".",
105+
config.Address,
106+
PipeDirection.InOut,
107+
PipeOptions.Asynchronous,
108+
TokenImpersonationLevel.Impersonation);
109+
110+
// Pass non-infinite timeout in order to cause internal connection algorithm
111+
// to check the CancellationToken periodically. Otherwise, if the named pipe
112+
// is waited using WaitNamedPipe with an infinite timeout, then the
113+
// CancellationToken cannot be observed.
114+
await namedPipe.ConnectAsync(int.MaxValue, token).ConfigureAwait(false);
115+
116+
return namedPipe;
117+
}
118+
else if (config.Transport == IpcEndpointConfig.TransportType.UnixDomainSocket)
119+
{
120+
IpcUnixDomainSocket socket = new();
121+
await socket.ConnectAsync(new IpcUnixDomainSocketEndPoint(config.Address), token).ConfigureAwait(false);
122+
return new ExposedSocketNetworkStream(socket, ownsSocket: true);
123+
}
124+
else
125+
{
126+
throw new ArgumentException($"Unsupported IpcEndpointConfig transport type {config.Transport}");
127+
}
110128
}
111-
else
129+
catch (SocketException ex)
112130
{
113-
throw new ArgumentException($"Unsupported IpcEndpointConfig transport type {config.Transport}");
131+
throw new ServerNotAvailableException($"Unable to connect to the server. {ex.Message}", ex);
114132
}
115133
}
116134
}

src/Microsoft.Diagnostics.TestHelpers/SdkPrebuiltDebuggeeCompiler.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@ public class SdkPrebuiltDebuggeeCompiler : IDebuggeeCompiler
1515

1616
public SdkPrebuiltDebuggeeCompiler(TestConfiguration config, string debuggeeName)
1717
{
18+
if (string.IsNullOrEmpty(config.TargetConfiguration))
19+
{
20+
throw new System.ArgumentException("TargetConfiguration must be set in the TestConfiguration");
21+
}
22+
if (string.IsNullOrEmpty(config.BuildProjectFramework))
23+
{
24+
throw new System.ArgumentException("BuildProjectFramework must be set in the TestConfiguration");
25+
}
26+
1827
// The layout is how the current .NET Core SDK layouts the binaries out:
1928
// Source Path: <DebuggeeSourceRoot>/<DebuggeeName>/[<DebuggeeName>]
2029
// Binary Path: <DebuggeeBuildRoot>/bin/<DebuggeeName>/<TargetConfiguration>/<BuildProjectFramework>

src/tests/CommonTestRunner/TestRunner.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ public static async Task<TestRunner> Create(TestConfiguration config, ITestOutpu
4949

5050
// Get the full debuggee launch command line (includes the host if required)
5151
string exePath = debuggeeConfig.BinaryExePath;
52+
if (!File.Exists(exePath))
53+
{
54+
throw new FileNotFoundException($"Expected to find target executable at {exePath} but it didn't exist. Perhaps the path was improperly configured or a build/deployment error caused the file to be missing?");
55+
}
5256
string pipeName = null;
5357

5458
StringBuilder arguments = new();

src/tests/Microsoft.Diagnostics.Monitoring.EventPipe/EventCounterPipelineUnitTests.cs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,17 @@ private sealed class TestMetricsLogger : ICountersLogger
5959
private readonly List<ExpectedCounter> _expectedCounters = new();
6060
private Dictionary<ExpectedCounter, ICounterPayload> _metrics = new();
6161
private readonly TaskCompletionSource<object> _foundExpectedCountersSource;
62+
private readonly ITestOutputHelper _output;
6263

63-
public TestMetricsLogger(IEnumerable<ExpectedCounter> expectedCounters, TaskCompletionSource<object> foundExpectedCountersSource)
64+
public TestMetricsLogger(IEnumerable<ExpectedCounter> expectedCounters, TaskCompletionSource<object> foundExpectedCountersSource, ITestOutputHelper output)
6465
{
6566
_foundExpectedCountersSource = foundExpectedCountersSource;
6667
_expectedCounters = new(expectedCounters);
6768
if (_expectedCounters.Count == 0)
6869
{
6970
foundExpectedCountersSource.SetResult(null);
7071
}
72+
_output = output;
7173
}
7274

7375
public IEnumerable<ICounterPayload> Metrics => _metrics.Values;
@@ -90,17 +92,29 @@ public void Log(ICounterPayload payload)
9092
ExpectedCounter expectedCounter = _expectedCounters.Find(c => c.MatchesCounterMetadata(payload.CounterMetadata));
9193
if(expectedCounter != null)
9294
{
95+
9396
_expectedCounters.Remove(expectedCounter);
9497
_metrics.Add(expectedCounter, payload);
98+
99+
_output.WriteLine($"Found expected counter: {expectedCounter.ProviderName}/{expectedCounter.CounterName}. Counters remaining={_expectedCounters.Count}");
95100
// Complete the task source if the last expected key was removed.
96101
if (_expectedCounters.Count == 0)
97102
{
103+
_output.WriteLine($"All expected counters have been received. Signaling pipeline can exit.");
98104
_foundExpectedCountersSource.TrySetResult(null);
99105
}
100106
}
107+
else
108+
{
109+
_output.WriteLine($"Received additional counter event: {payload.CounterMetadata.ProviderName}/{payload.CounterMetadata.CounterName}");
110+
}
101111
}
102112

103-
public Task PipelineStarted(CancellationToken token) => Task.CompletedTask;
113+
public Task PipelineStarted(CancellationToken token)
114+
{
115+
_output.WriteLine("Counters pipeline is running. Waiting to receive expected counters from tracee.");
116+
return Task.CompletedTask;
117+
}
104118

105119
public Task PipelineStopped(CancellationToken token) => Task.CompletedTask;
106120
}
@@ -113,7 +127,7 @@ public async Task TestCounterEventPipeline(TestConfiguration config)
113127

114128
TaskCompletionSource<object> foundExpectedCountersSource = new(TaskCreationOptions.RunContinuationsAsynchronously);
115129

116-
TestMetricsLogger logger = new(expectedCounters.Select(name => new ExpectedCounter(expectedProvider, name)), foundExpectedCountersSource);
130+
TestMetricsLogger logger = new(expectedCounters.Select(name => new ExpectedCounter(expectedProvider, name)), foundExpectedCountersSource, _output);
117131

118132
await using (TestRunner testRunner = await PipelineTestUtilities.StartProcess(config, "CounterRemoteTest", _output))
119133
{
@@ -169,7 +183,7 @@ public async Task TestDuplicateNameMetrics(TestConfiguration config)
169183
new ExpectedCounter(providerName, counterName, "MeterTag=two","InstrumentTag=B"),
170184
];
171185
TaskCompletionSource<object> foundExpectedCountersSource = new(TaskCreationOptions.RunContinuationsAsynchronously);
172-
TestMetricsLogger logger = new(expectedCounters, foundExpectedCountersSource);
186+
TestMetricsLogger logger = new(expectedCounters, foundExpectedCountersSource, _output);
173187

174188
await using (TestRunner testRunner = await PipelineTestUtilities.StartProcess(config, "DuplicateNameMetrics", _output, testProcessTimeout: 3_000))
175189
{

src/tests/Microsoft.Diagnostics.NETCore.Client/GetProcessInfoTests.cs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,22 @@ private async Task BasicProcessInfoTestCore(TestConfiguration config, bool useAs
7474
ProcessInfo processInfoBeforeResume = null;
7575
if (suspend)
7676
{
77-
processInfoBeforeResume = await clientShim.GetProcessInfo();
77+
// when the process is just starting up, the IPC channel may not be ready yet. We need to be prepared for the connection attempt to fail.
78+
// If 100 retries over 10 seconds fail then we'll go ahead and fail the test.
79+
const int retryCount = 100;
80+
for (int i = 0; i < retryCount; i++)
81+
{
82+
try
83+
{
84+
processInfoBeforeResume = await clientShim.GetProcessInfo();
85+
break;
86+
}
87+
catch (ServerNotAvailableException) when (i < retryCount-1)
88+
{
89+
_output.WriteLine($"Failed to connect to the IPC channel as the process is starting up. Attempt {i+1} of {retryCount}. Waiting 0.1 seconds, then retrying.");
90+
await Task.Delay(100);
91+
}
92+
}
7893
ValidateProcessInfo(runner.Pid, processInfoBeforeResume);
7994
Assert.True((config.RuntimeFrameworkVersionMajor < 8) == string.IsNullOrEmpty(processInfoBeforeResume.ManagedEntrypointAssemblyName));
8095

0 commit comments

Comments
 (0)