Skip to content

Commit 8f1b9fa

Browse files
committed
(PUP-9998) Check agent disabled status after sleeping
Puppet's disabled status checking was subject to race conditions. It checked the status once at the beginning of its run, but between the time it checked and when the agent woke up from splay, someone could disable the agent. This commit adds additional checks in situations where we may have slept for arbitrary amounts of time. Note when running onetime, the `Puppet::Agent` class handles splaying. Otherwise, splay is handled by the job scheduler, before `Puppet::Agent#run` is called and the job scheduler passes `splay: false` as an argument to the `run` method.
1 parent b6f5ca6 commit 8f1b9fa

File tree

2 files changed

+53
-13
lines changed

2 files changed

+53
-13
lines changed

lib/puppet/agent.rb

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,25 @@ def needing_restart?
3838
# Perform a run with our client.
3939
def run(client_options = {})
4040
if disabled?
41-
Puppet.notice _("Skipping run of %{client_class}; administratively disabled (Reason: '%{disable_message}');\nUse 'puppet agent --enable' to re-enable.") % { client_class: client_class, disable_message: disable_message }
41+
log_disabled_message
4242
return
4343
end
4444

4545
result = nil
4646
wait_for_lock_deadline = nil
4747
block_run = Puppet::Application.controlled_run do
48-
# splay may sleep for awhile!
49-
splay(client_options.fetch(:splay, Puppet[:splay]))
48+
# splay may sleep for awhile when running onetime! If not onetime, then
49+
# the job scheduler splays (only once) so that agents assign themselves a
50+
# slot within the splay interval.
51+
do_splay = client_options.fetch(:splay, Puppet[:splay])
52+
if do_splay
53+
splay(do_splay)
54+
55+
if disabled?
56+
log_disabled_message
57+
break
58+
end
59+
end
5060

5161
# waiting for certs may sleep for awhile depending on onetime, waitforcert and maxwaitforcert!
5262
# this needs to happen before forking so that if we fail to obtain certs and try to exit, then
@@ -59,14 +69,19 @@ def run(client_options = {})
5969
begin
6070
# lock may sleep for awhile depending on waitforlock and maxwaitforlock!
6171
lock do
62-
# NOTE: Timeout is pretty heinous as the location in which it
63-
# throws an error is entirely unpredictable, which means that
64-
# it can interrupt code blocks that perform cleanup or enforce
65-
# sanity. The only thing a Puppet agent should do after this
66-
# error is thrown is die with as much dignity as possible.
67-
Timeout.timeout(Puppet[:runtimeout], RunTimeoutError) do
68-
Puppet.override(ssl_context: ssl_context) do
69-
client.run(client_args)
72+
if disabled?
73+
log_disabled_message
74+
nil
75+
else
76+
# NOTE: Timeout is pretty heinous as the location in which it
77+
# throws an error is entirely unpredictable, which means that
78+
# it can interrupt code blocks that perform cleanup or enforce
79+
# sanity. The only thing a Puppet agent should do after this
80+
# error is thrown is die with as much dignity as possible.
81+
Timeout.timeout(Puppet[:runtimeout], RunTimeoutError) do
82+
Puppet.override(ssl_context: ssl_context) do
83+
client.run(client_args)
84+
end
7085
end
7186
end
7287
end
@@ -88,8 +103,7 @@ def run(client_options = {})
88103
end
89104
rescue RunTimeoutError => detail
90105
Puppet.log_exception(detail, _("Execution of %{client_class} did not complete within %{runtimeout} seconds and was terminated.") %
91-
{client_class: client_class,
92-
runtimeout: Puppet[:runtimeout]})
106+
{client_class: client_class, runtimeout: Puppet[:runtimeout]})
93107
nil
94108
rescue StandardError => detail
95109
Puppet.log_exception(detail, _("Could not run %{client_class}: %{detail}") % { client_class: client_class, detail: detail })
@@ -155,4 +169,8 @@ def wait_for_certificates(options)
155169
sm = Puppet::SSL::StateMachine.new(waitforcert: waitforcert, onetime: Puppet[:onetime])
156170
sm.ensure_client_certificate
157171
end
172+
173+
def log_disabled_message
174+
Puppet.notice _("Skipping run of %{client_class}; administratively disabled (Reason: '%{disable_message}');\nUse 'puppet agent --enable' to re-enable.") % { client_class: client_class, disable_message: disable_message }
175+
end
158176
end

spec/unit/agent_spec.rb

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ def controlled_run(&block)
103103
end
104104

105105
it "should splay" do
106+
Puppet[:splay] = true
107+
106108
expect(@agent).to receive(:splay)
107109

108110
@agent.run
@@ -185,6 +187,26 @@ def controlled_run(&block)
185187
expect(@agent.run).to eq(:result)
186188
end
187189

190+
it "should check if it's disabled after splaying and log a message" do
191+
Puppet[:splay] = true
192+
Puppet[:splaylimit] = '5s'
193+
Puppet[:onetime] = true
194+
195+
expect(@agent).to receive(:disabled?).and_return(false, true)
196+
197+
allow(Puppet).to receive(:notice).and_call_original
198+
expect(Puppet).to receive(:notice).with(/Skipping run of .*; administratively disabled.*/)
199+
@agent.run
200+
end
201+
202+
it "should check if it's disabled after acquiring the lock and log a message" do
203+
expect(@agent).to receive(:disabled?).and_return(false, true)
204+
205+
allow(Puppet).to receive(:notice).and_call_original
206+
expect(Puppet).to receive(:notice).with(/Skipping run of .*; administratively disabled.*/)
207+
@agent.run
208+
end
209+
188210
describe "and a puppet agent is already running" do
189211
before(:each) do
190212
allow_any_instance_of(Object).to receive(:sleep)

0 commit comments

Comments
 (0)