Skip to content

Commit 7387d8e

Browse files
authored
Merge pull request #90 from JuliaCI/tb/macos_wrapper_loop
macOS: Run agent in a loop instead of relying on KeepAlive
2 parents 764d662 + 831f72e commit 7387d8e

File tree

3 files changed

+71
-41
lines changed

3 files changed

+71
-41
lines changed

common/coredump_config.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ function set_coredump_pattern(pattern::AbstractString)
4141
label = "org.julialang.buildkite.corefile"
4242
config = LaunchctlConfig(
4343
label,
44-
[Sys.which("sysctl"), "-w", "kern.corefile=$(pattern)"];
45-
keepalive=false,
44+
[Sys.which("sysctl"), "-w", "kern.corefile=$(pattern)"]
4645
)
4746
mktempdir() do dir
4847
open(joinpath(dir, "config"); write=true) do io

common/mac_launchctl_config.jl

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ struct LaunchctlConfig
1717
logpath::Union{String,Nothing}
1818

1919
# Whether the job should be restarted after it dies
20-
keepalive::Union{Bool,Nothing}
20+
keepalive::Union{NamedTuple,Bool,Nothing}
2121

22-
function LaunchctlConfig(label, target; env = Dict{String,String}(), cwd = nothing, logpath = nothing, keepalive = true)
22+
function LaunchctlConfig(label, target; env = Dict{String,String}(), cwd = nothing, logpath = nothing, keepalive = nothing)
2323
return new(label, target, env, cwd, logpath, keepalive)
2424
end
2525
end
@@ -41,13 +41,13 @@ function write(io::IO, config::LaunchctlConfig)
4141
# Target process to launch
4242
print(io, """
4343
<key>ProgramArguments</key>
44-
<array>
44+
<array>
4545
""")
4646
for word in config.target
47-
println(io, " <string>$(word)</string>")
47+
println(io, " <string>$(word)</string>")
4848
end
4949
println(io, """
50-
</array>
50+
</array>
5151
""")
5252

5353
# We always want these things to be run at load
@@ -57,7 +57,22 @@ function write(io::IO, config::LaunchctlConfig)
5757
""")
5858

5959
# If we've been asked to print a keepalive
60-
if config.keepalive !== nothing
60+
if config.keepalive isa NamedTuple || config.keepalive isa Dict
61+
print(io, """
62+
<key>KeepAlive</key>
63+
<dict>
64+
""")
65+
for k in keys(config.keepalive)
66+
v = config.keepalive[k]
67+
print(io, """
68+
<key>$k</key>
69+
<$v />
70+
""")
71+
end
72+
println(io, """
73+
</dict>
74+
""")
75+
elseif config.keepalive !== nothing
6176
println(io, """
6277
<key>KeepAlive</key>
6378
<$(config.keepalive) />

macos-seatbelt/common.jl

Lines changed: 49 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -174,58 +174,74 @@ function generate_launchctl_script(io::IO, brg::BuildkiteRunnerGroup;
174174
print(w_io, """
175175
#!/bin/bash
176176
177-
# Cleanup host paths to protect against stale state leaking
177+
# macOS doesn't like services restarting all the time,
178+
# so keep launching agents until we're asked to quit.
179+
while true; do
180+
181+
# Cleanup host paths to protect against stale state leaking
178182
""")
179183

180184
for path in host_paths_to_cleanup(temp_path, cache_path)
181-
println(w_io, "chmod -R u+w $(path)")
182-
println(w_io, "rm -rf $(path)")
185+
println(w_io, " chmod -R u+w $(path)")
186+
println(w_io, " rm -rf $(path)")
183187
end
184188

185189
print(w_io, """
186-
# Create host paths that must exist
190+
# Create host paths that must exist
187191
""")
188192
for path in host_paths_to_create(temp_path, cache_path)
189-
println(w_io, "mkdir -p $(path)")
193+
println(w_io, " mkdir -p $(path)")
190194
end
191195

192196
println(w_io, """
193-
# Copy secrets into cache directory, which will be deleted by agent environment hook
194-
rm -rf $(secrets_dst_path)
195-
cp -Ra $(secrets_src_path) $(secrets_dst_path)
196-
197-
# Invoke agent inside of sandbox
198-
sandbox-exec -f $(sb_path) $(agent_path) start \\
199-
--disconnect-after-job \\
200-
--sockets-path=$(temp_path) \\
201-
--hooks-path=$(hooks_path) \\
202-
--build-path=$(cache_path)/build \\
203-
--experiment=resolve-commit-after-checkout \\
204-
--git-mirrors-path=$(cache_path)/repos \\
205-
--tags=$(join(tags_with_queues, ",")) \\
206-
--name=$(agent_name)
197+
# Copy secrets into cache directory, which will be deleted by agent environment hook
198+
rm -rf $(secrets_dst_path)
199+
cp -Ra $(secrets_src_path) $(secrets_dst_path)
200+
201+
# Invoke agent inside of sandbox
202+
sandbox-exec -f $(sb_path) $(agent_path) start \\
203+
--disconnect-after-job \\
204+
--sockets-path=$(temp_path) \\
205+
--hooks-path=$(hooks_path) \\
206+
--build-path=$(cache_path)/build \\
207+
--experiment=resolve-commit-after-checkout \\
208+
--git-mirrors-path=$(cache_path)/repos \\
209+
--tags=$(join(tags_with_queues, ",")) \\
210+
--name=$(agent_name)
211+
ret=\$?
212+
echo "Agent exited with status \$ret"
207213
""")
208214

209215
print(w_io, """
210-
# Cleanup host paths
216+
# Cleanup host paths
211217
""")
212218
for path in host_paths_to_cleanup(temp_path, cache_path)
213-
println(w_io, "chmod -R u+w $(path)")
214-
println(w_io, "rm -rf $(path)")
219+
println(w_io, " chmod -R u+w $(path)")
220+
println(w_io, " rm -rf $(path)")
215221
end
216222

217223
print(w_io, """
218-
# Reboot the machine if we've been running for more than 24 hours
219-
ts_boot=\$(sysctl -n kern.boottime | cut -d" " -f4 | cut -d"," -f1)
220-
ts_now=\$(date +%s)
221-
222-
if ((ts_now - ts_boot > 24*60*60)); then
223-
sudo -n /sbin/shutdown -r now
224+
# Reboot the machine if we've been running for more than 24 hours
225+
ts_boot=\$(sysctl -n kern.boottime | cut -d" " -f4 | cut -d"," -f1)
226+
ts_now=\$(date +%s)
227+
if ((ts_now - ts_boot > 24*60*60)); then
228+
echo "Rebooting machine after 24 hours of uptime"
229+
sudo -n /sbin/shutdown -r now
230+
break
231+
fi
232+
""")
224233

225-
# Give the system the time to shut down,
226-
# preventing a new job from getting picked up
227-
sleep 30
228-
fi
234+
print(w_io, """
235+
# If the agent returned with code 255, that indicated a
236+
# graceful termination, so exit the loop
237+
if [ \$ret -eq 255 ]; then
238+
echo "Stopping service after graceful termination"
239+
break
240+
fi
241+
done
242+
243+
# Return success, which will result in the service _not_ restarting
244+
exit 0
229245
""")
230246
end
231247

@@ -236,7 +252,7 @@ function generate_launchctl_script(io::IO, brg::BuildkiteRunnerGroup;
236252
env = build_seatbelt_env(temp_path, cache_path),
237253
cwd = joinpath(cache_path, "build"),
238254
logpath = joinpath(cache_path, "agent.log"),
239-
keepalive = true,
255+
keepalive = (; SuccessfulExit=false),
240256
)
241257
write(io, lctl_config)
242258
end

0 commit comments

Comments
 (0)