Skip to content

Commit 5f1cb04

Browse files
Better testing.
1 parent e9c0edd commit 5f1cb04

File tree

3 files changed

+86
-74
lines changed

3 files changed

+86
-74
lines changed

fixtures/async/container/a_container.rb

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,91 @@ module Container
246246
expect(container.statistics).to have_attributes(failures: be > 0)
247247
end
248248
end
249+
250+
with "broken children" do
251+
it "can handle children that ignore termination with SIGKILL fallback" do
252+
# Test behavior that works for both processes (signals) and threads (exceptions)
253+
container.spawn(restart: false) do |instance|
254+
instance.ready!
255+
256+
# Ignore termination attempts in a way appropriate to the container type
257+
if container.class.multiprocess?
258+
# For multiprocess containers - ignore signals
259+
Signal.trap(:INT) {}
260+
Signal.trap(:TERM) {}
261+
while true
262+
sleep(0.1)
263+
end
264+
else
265+
# For threaded containers - ignore exceptions
266+
while true
267+
begin
268+
sleep(0.1)
269+
rescue Async::Container::Interrupt, Async::Container::Terminate
270+
# Ignore termination attempts
271+
end
272+
end
273+
end
274+
end
275+
276+
container.wait_until_ready
277+
278+
# Try to stop with a very short timeout to force escalation
279+
start_time = Time.now
280+
container.stop(0.1) # Very short timeout
281+
end_time = Time.now
282+
283+
# Should stop successfully via SIGKILL/thread termination
284+
expect(container.size).to be == 0
285+
286+
# Should not hang - escalation should work
287+
expect(end_time - start_time).to be < 2.0
288+
end
289+
290+
it "can handle unresponsive children that close pipes but don't exit" do
291+
container.spawn(restart: false) do |instance|
292+
instance.ready!
293+
294+
# Close communication pipe to simulate hung process:
295+
begin
296+
if instance.respond_to?(:out)
297+
instance.out.close if instance.out && !instance.out.closed?
298+
end
299+
rescue
300+
# Ignore close errors.
301+
end
302+
303+
# Become unresponsive:
304+
if container.class.multiprocess?
305+
# For multiprocess containers - ignore signals and close file descriptors:
306+
Signal.trap(:INT) {}
307+
Signal.trap(:TERM) {}
308+
(4..256).each do |fd|
309+
begin
310+
IO.for_fd(fd).close
311+
rescue
312+
# Ignore errors
313+
end
314+
end
315+
loop {} # Tight loop
316+
else
317+
# For threaded containers - just become unresponsive
318+
loop {} # Tight loop, no exception handling
319+
end
320+
end
321+
322+
container.wait_until_ready
323+
324+
# Should not hang even with unresponsive children
325+
start_time = Time.now
326+
container.stop(1.0)
327+
end_time = Time.now
328+
329+
expect(container.size).to be == 0
330+
# Should complete reasonably quickly via hang prevention
331+
expect(end_time - start_time).to be < 5.0
332+
end
333+
end
249334
end
250335
end
251336
end

lib/async/container/group.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def stop(graceful = true, interrupt_timeout: 1, terminate_timeout: 1)
165165
end
166166

167167
Console.debug(self, "Stopping all processes...", interrupt_timeout: interrupt_timeout, terminate_timeout: terminate_timeout)
168-
168+
169169
# If a timeout is specified, interrupt the children first:
170170
if interrupt_timeout
171171
clock = Async::Clock.start

test/async/container/forked.rb

Lines changed: 0 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -68,77 +68,4 @@
6868
it "should be multiprocess" do
6969
expect(subject).to be(:multiprocess?)
7070
end
71-
72-
it "can handle children that ignore SIGTERM with SIGKILL fallback" do
73-
# Create a child that ignores SIGINT and SIGTERM
74-
container.spawn(restart: false) do |instance|
75-
# Trap both SIGINT and SIGTERM to ignore them (like the example)
76-
Signal.trap(:INT) {}
77-
Signal.trap(:TERM) {}
78-
79-
instance.ready!
80-
81-
# Infinite loop that can only be stopped by SIGKILL
82-
while true
83-
sleep(0.1)
84-
end
85-
end
86-
87-
container.wait_until_ready
88-
89-
# Try to stop with a very short timeout
90-
# This should first try SIGINT, then SIGTERM, then fall back to SIGKILL
91-
start_time = Time.now
92-
container.stop(0.1) # 100ms timeout - very short
93-
end_time = Time.now
94-
95-
# The container should stop successfully even though the child ignored signals
96-
expect(container.size).to be == 0
97-
98-
# It should not take too long (should not hang waiting for SIGTERM)
99-
# Allow some buffer time for the SIGKILL fallback mechanism
100-
expect(end_time - start_time).to be < 2.0
101-
end
102-
103-
it "can handle unresponsive children that close pipes but don't exit" do
104-
# Simulate a production hang scenario where a child closes file descriptors
105-
# but doesn't actually exit, becoming unresponsive
106-
container.spawn(restart: false) do |instance|
107-
# Ignore all signals
108-
Signal.trap(:INT) {}
109-
Signal.trap(:TERM) {}
110-
111-
instance.ready!
112-
113-
# Close all file descriptors above 3 (like the production hang scenario)
114-
# This will close the notify pipe, making the parent think we've "exited"
115-
(4..256).each do |fd|
116-
begin
117-
IO.for_fd(fd).close
118-
rescue
119-
# Ignore errors for non-existent file descriptors
120-
end
121-
end
122-
123-
# Now become unresponsive (infinite loop without yielding)
124-
while true
125-
# Tight loop without sleep - process is unresponsive but still alive
126-
end
127-
end
128-
129-
container.wait_until_ready
130-
131-
# This should not hang - even with unresponsive processes, stop should work
132-
start_time = Time.now
133-
container.stop(2.0) # Give it a reasonable timeout for testing
134-
end_time = Time.now
135-
136-
# Container should stop successfully
137-
expect(container.size).to be == 0
138-
139-
# Should complete within the child's individual timeout + buffer (30s + 5s)
140-
# The process is so unresponsive it needs the individual Child timeout to kill it
141-
# This proves the hang prevention works - without it, this would hang forever
142-
expect(end_time - start_time).to be < 35.0
143-
end
14471
end if Async::Container.fork?

0 commit comments

Comments
 (0)