Skip to content

Commit 8c8619a

Browse files
author
Scott Arbeit
committed
Improve CI readiness diagnostics
1 parent 4722a7c commit 8c8619a

File tree

1 file changed

+182
-3
lines changed

1 file changed

+182
-3
lines changed

src/Grace.Server.Tests/AspireTestHost.fs

Lines changed: 182 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ module AspireTestHost =
4747

4848
let private getTimeout (local: TimeSpan) (ci: TimeSpan) = if isCi then ci else local
4949

50-
let private defaultWaitTimeout = getTimeout (TimeSpan.FromMinutes(5.0)) (TimeSpan.FromMinutes(10.0))
50+
let private defaultWaitTimeout = getTimeout (TimeSpan.FromMinutes(5.0)) (TimeSpan.FromMinutes(3.0))
5151

5252
let private getResource (app: DistributedApplication) (resourceName: string) =
5353
let model = app.Services.GetRequiredService<DistributedApplicationModel>()
@@ -168,6 +168,162 @@ module AspireTestHost =
168168
return lines |> Seq.toList
169169
}
170170

171+
type private ProcessResult =
172+
{
173+
ExitCode: int option
174+
StdOut: string
175+
StdErr: string
176+
TimedOut: bool
177+
Error: string option
178+
}
179+
180+
let private runProcessAsync (fileName: string) (arguments: string) (timeout: TimeSpan) =
181+
task {
182+
try
183+
let startInfo = ProcessStartInfo(fileName, arguments)
184+
startInfo.RedirectStandardOutput <- true
185+
startInfo.RedirectStandardError <- true
186+
startInfo.UseShellExecute <- false
187+
startInfo.CreateNoWindow <- true
188+
189+
use proc = new Process()
190+
proc.StartInfo <- startInfo
191+
192+
if not (proc.Start()) then
193+
return { ExitCode = None; StdOut = ""; StdErr = ""; TimedOut = false; Error = Some "Failed to start process." }
194+
else
195+
let waitTask = proc.WaitForExitAsync()
196+
let! completed = Task.WhenAny(waitTask, Task.Delay(timeout))
197+
198+
if completed <> waitTask then
199+
try
200+
proc.Kill(true)
201+
with
202+
| _ -> ()
203+
204+
return { ExitCode = None; StdOut = ""; StdErr = ""; TimedOut = true; Error = None }
205+
else
206+
let! stdOut = proc.StandardOutput.ReadToEndAsync()
207+
let! stdErr = proc.StandardError.ReadToEndAsync()
208+
209+
return
210+
{
211+
ExitCode = Some proc.ExitCode
212+
StdOut = stdOut
213+
StdErr = stdErr
214+
TimedOut = false
215+
Error = None
216+
}
217+
with
218+
| ex ->
219+
return { ExitCode = None; StdOut = ""; StdErr = ""; TimedOut = false; Error = Some ex.Message }
220+
}
221+
222+
let private formatLogTail (label: string) (lines: string list) (maxLines: int) =
223+
let tail =
224+
lines
225+
|> List.rev
226+
|> List.truncate maxLines
227+
|> List.rev
228+
|> String.concat Environment.NewLine
229+
230+
if String.IsNullOrWhiteSpace tail then
231+
$"{label}: <no logs captured>"
232+
else
233+
$"{label}:{Environment.NewLine}{tail}"
234+
235+
let private getResourceLogSnapshotAsync (app: DistributedApplication) =
236+
task {
237+
let model = app.Services.GetRequiredService<DistributedApplicationModel>()
238+
let tasks =
239+
model.Resources
240+
|> Seq.map (fun resource ->
241+
task {
242+
let name = resource.Name
243+
244+
try
245+
let! logLines = getResourceLogsAsync app name
246+
return formatLogTail $"[{name}]" logLines 50
247+
with
248+
| ex ->
249+
return $"[{name}]: failed to capture logs ({ex.Message})"
250+
})
251+
|> Seq.toArray
252+
253+
let! snapshots = Task.WhenAll(tasks)
254+
return snapshots |> String.concat Environment.NewLine
255+
}
256+
257+
let private formatProcessFailure (label: string) (result: ProcessResult) =
258+
if result.TimedOut then
259+
$"{label} timed out."
260+
else
261+
let exitCode =
262+
result.ExitCode
263+
|> Option.map string
264+
|> Option.defaultValue "<unknown>"
265+
266+
let details =
267+
[
268+
if not (String.IsNullOrWhiteSpace result.StdOut) then
269+
$"stdout:{Environment.NewLine}{result.StdOut.TrimEnd()}"
270+
if not (String.IsNullOrWhiteSpace result.StdErr) then
271+
$"stderr:{Environment.NewLine}{result.StdErr.TrimEnd()}"
272+
]
273+
|> String.concat Environment.NewLine
274+
275+
match result.Error with
276+
| Some errorMessage -> $"{label} failed: {errorMessage}"
277+
| None when not (String.IsNullOrWhiteSpace details) -> $"{label} exited with {exitCode}.{Environment.NewLine}{details}"
278+
| None -> $"{label} exited with {exitCode}."
279+
280+
let private tryGetDockerDiagnosticsAsync () =
281+
task {
282+
let! psResult = runProcessAsync "docker" "ps -a --format \"{{.ID}} {{.Names}}\"" (TimeSpan.FromSeconds(10.0))
283+
284+
if psResult.TimedOut || psResult.ExitCode <> Some 0 || psResult.Error.IsSome then
285+
return formatProcessFailure "Docker ps" psResult
286+
else
287+
let lines =
288+
psResult.StdOut.Split([| '\r'; '\n' |], StringSplitOptions.RemoveEmptyEntries)
289+
290+
if lines.Length = 0 then
291+
return "Docker ps: no containers."
292+
else
293+
let containers =
294+
lines
295+
|> Seq.map (fun line ->
296+
let parts = line.Split([| ' ' |], 2, StringSplitOptions.RemoveEmptyEntries)
297+
if parts.Length = 0 then
298+
None
299+
else
300+
let id = parts[0]
301+
let name = if parts.Length > 1 then parts[1] else "<unknown>"
302+
Some(id, name))
303+
|> Seq.choose id
304+
|> Seq.truncate 10
305+
|> Seq.toArray
306+
307+
let tasks =
308+
containers
309+
|> Array.map (fun (id, name) ->
310+
task {
311+
let! logResult = runProcessAsync "docker" $"logs --tail 200 {id}" (TimeSpan.FromSeconds(15.0))
312+
313+
if logResult.TimedOut || logResult.ExitCode <> Some 0 || logResult.Error.IsSome then
314+
return formatProcessFailure $"Docker logs ({name})" logResult
315+
else if String.IsNullOrWhiteSpace logResult.StdOut then
316+
return $"Docker logs ({name}): <empty>"
317+
else
318+
return $"Docker logs ({name}):{Environment.NewLine}{logResult.StdOut.TrimEnd()}"
319+
})
320+
321+
let! logBlocks = Task.WhenAll(tasks)
322+
323+
let containersSummary = String.Join(Environment.NewLine, lines)
324+
return $"Docker containers:{Environment.NewLine}{containersSummary}{Environment.NewLine}{String.Join(Environment.NewLine, logBlocks)}"
325+
}
326+
171327
let private waitForResourceHealthyAsync
172328
(notificationService: ResourceNotificationService)
173329
(app: DistributedApplication)
@@ -359,7 +515,7 @@ module AspireTestHost =
359515

360516
use client = new CosmosClient(connectionString, options)
361517
let sw = Stopwatch.StartNew()
362-
let timeout = getTimeout (TimeSpan.FromMinutes(3.0)) (TimeSpan.FromMinutes(6.0))
518+
let timeout = getTimeout (TimeSpan.FromMinutes(3.0)) (TimeSpan.FromMinutes(3.0))
363519
let perCallTimeout = TimeSpan.FromSeconds(10.0)
364520
let mutable lastError = String.Empty
365521
let mutable attempt = 0
@@ -578,7 +734,30 @@ module AspireTestHost =
578734
let client = app.CreateHttpClient(graceServerResourceName, endpointName)
579735
client.Timeout <- getTimeout (TimeSpan.FromSeconds(100.0)) (TimeSpan.FromMinutes(5.0))
580736

581-
do! waitForGraceServerHttpReadyAsync client cts.Token
737+
try
738+
do! waitForGraceServerHttpReadyAsync client cts.Token
739+
with
740+
| ex ->
741+
let details = describeResourceState notificationService graceServerResourceName
742+
let! graceResourceLogs = getResourceLogsAsync app graceServerResourceName
743+
let graceResourceLogDetails = formatLogTail "Grace.Server resource logs" graceResourceLogs 50
744+
let envDetails = formatEnvDiagnostics env
745+
746+
let graceFileLog =
747+
env
748+
|> Map.tryFind Constants.EnvironmentVariables.GraceLogDirectory
749+
|> Option.bind tryGetLatestLogTail
750+
|> Option.defaultValue "No Grace.Server log file captured."
751+
752+
let! aspireLogSnapshot = getResourceLogSnapshotAsync app
753+
let! dockerDiagnostics = tryGetDockerDiagnosticsAsync ()
754+
755+
raise (
756+
Exception(
757+
$"Grace-server HTTP readiness failed. {details}{Environment.NewLine}Error: {ex.Message}{Environment.NewLine}Env: {envDetails}{Environment.NewLine}{graceResourceLogDetails}{Environment.NewLine}{graceFileLog}{Environment.NewLine}{aspireLogSnapshot}{Environment.NewLine}{dockerDiagnostics}",
758+
ex
759+
)
760+
)
582761

583762
let diagnosticsPath = Path.Combine(Path.GetTempPath(), "grace-server-tests.host.log")
584763

0 commit comments

Comments
 (0)