Skip to content

Commit 1b9d131

Browse files
andrewlockchojomok
authored andcommitted
Exclude common failure in smoke tests (#6469)
## Summary of changes Retries a common error flake that we can't get to the bottom of. ## Reason for change The runtime _sometimes_ crashes after the app completes and we've shut down, causing flake. We haven't managed to get to the bottom of it yet. ## Implementation details Retry the smoke tests _once_ if we spot an error that looks like this: ``` ptrace(ATTACH, 14822) FAILED Operation not permitted ``` ## Test coverage Hard to test - as long as it passes for now that's good enough I think
1 parent 1913706 commit 1b9d131

File tree

1 file changed

+31
-2
lines changed
  • tracer/test/Datadog.Trace.ClrProfiler.IntegrationTests/SmokeTests

1 file changed

+31
-2
lines changed

tracer/test/Datadog.Trace.ClrProfiler.IntegrationTests/SmokeTests/SmokeTestBase.cs

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,24 @@ protected void SetEnvironmentVariable(string key, string value)
5454
/// <param name="expectedExitCode">Expected exit code</param>
5555
/// <returns>Async operation</returns>
5656
protected async Task CheckForSmoke(bool shouldDeserializeTraces = true, int expectedExitCode = 0)
57+
{
58+
// named pipes is notoriously flaky
59+
var attemptsRemaining = 2;
60+
while (attemptsRemaining > 0)
61+
{
62+
if (await RunCheck(shouldDeserializeTraces, expectedExitCode, attemptsRemaining > 1))
63+
{
64+
// all good
65+
return;
66+
}
67+
68+
await ErrorHelpers.SendMetric(Output, "dd_trace_dotnet.ci.tests.retries", EnvironmentHelper);
69+
}
70+
71+
throw new Exception("Unreachable, should throw in RunCheck or return true");
72+
}
73+
74+
private async Task<bool> RunCheck(bool shouldDeserializeTraces, int expectedExitCode, bool allowRetry)
5775
{
5876
var applicationPath = EnvironmentHelper.GetSampleApplicationPath().Replace(@"\\", @"\");
5977
Output.WriteLine($"Application path: {applicationPath}");
@@ -106,7 +124,7 @@ protected async Task CheckForSmoke(bool shouldDeserializeTraces = true, int expe
106124
if (AssumeSuccessOnTimeout)
107125
{
108126
Assert.True(true, "No smoke is a good sign for this case, even on timeout.");
109-
return;
127+
return true;
110128
}
111129
else
112130
{
@@ -136,9 +154,18 @@ protected async Task CheckForSmoke(bool shouldDeserializeTraces = true, int expe
136154

137155
ErrorHelpers.CheckForKnownSkipConditions(Output, result.ExitCode, result.StandardError, EnvironmentHelper);
138156

157+
// TODO: Investigate and fix this!
158+
if (allowRetry && Regex.IsMatch(result.StandardError, @"ptrace\(ATTACH, \d+\) FAILED Operation not permitted"))
159+
{
160+
// We have a "known" issue with getting errors like 'ptrace(ATTACH, 1234) FAILED Operation not permitted'
161+
// It causes flake, happens during shutdown, but affects all runtimes.
162+
// We don't have a good story for it now, so do a single retry...
163+
Output.WriteLine($"Received 'ptrace(ATTACH, *) FAILED Operation not permitted' in standard error. Retrying once.");
164+
return false;
165+
}
139166
#if !NET5_0_OR_GREATER
140167
if (result.StandardOutput.Contains("App completed successfully")
141-
&& Regex.IsMatch(result.StandardError, @"open\(/proc/\d+/mem\) FAILED 2 \(No such file or directory\)"))
168+
&& Regex.IsMatch(result.StandardError, @"open\(/proc/\d+/mem\) FAILED 2 \(No such file or directory\)"))
142169
{
143170
// The above message is the last thing set before we exit.
144171
// We can still get flake on shutdown (which we can't isolate), but for some reason
@@ -153,6 +180,8 @@ protected async Task CheckForSmoke(bool shouldDeserializeTraces = true, int expe
153180
{
154181
Assert.True(string.IsNullOrEmpty(result.StandardError), $"Expected no errors in smoke test: {result.StandardError}");
155182
}
183+
184+
return true;
156185
}
157186
}
158187
}

0 commit comments

Comments
 (0)