Skip to content

Commit 5fc7b11

Browse files
committed
Tau - add killswitch
Now that we have changed Tau's boot scripts to use the release scripts from Elixir, we no longer can get the OS pid from the spawn process. The pid is necessary so that we can explicitly kill Tau when we shutdown or something goes wrong. We attempted to circumnavigate this by explicitly passign the pid from Tau to Daemon via a TCP socket. However, there are still some edge cases where this pid isn't correctly sent/received. To mitigate this and to add extra reassurances that Tau will properly exit and not turn into a zombie process we now have an internal kill switch within Tau. This is a simple timer which when fired will bring the whole process down. The timer is reset by the Daemon sending an appropriate keep alive OSC message. If no keepalive message is received within 5 seconds Tau will shut itself down.
1 parent ecf9ced commit 5fc7b11

File tree

3 files changed

+105
-13
lines changed

3 files changed

+105
-13
lines changed
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
%% Sonic Pi API server process
2+
%% --
3+
%% This file is part of Sonic Pi: http://sonic-pi.net
4+
%% Full project source: https://github.com/sonic-pi-net/sonic-pi
5+
%% License: https://github.com/sonic-pi-net/sonic-pi/blob/main/LICENSE.md
6+
%%
7+
%% Copyright 2021 by Sam Aaron (http://sam.aaron.name/)
8+
%% All rights reserved.
9+
%%
10+
%% Permission is granted for use, copying, modification, and
11+
%% distribution of modified versions of this work as long as this
12+
%% notice is included.
13+
%% ++
14+
15+
-module(tau_keepalive).
16+
17+
-export([start/1, init/1, loop/1]).
18+
19+
-import(tau_server_util,
20+
[log/1, log/2, debug/2, debug/3]).
21+
22+
start(DaemonPortNum) ->
23+
spawn(?MODULE, init, [DaemonPortNum]).
24+
25+
init(DaemonPortNum) ->
26+
log("connecting to Daemon via TCP...~n", []),
27+
{ok, DaemonSocket} = gen_tcp:connect({127,0,0,1}, DaemonPortNum, [
28+
binary,
29+
{active, true},
30+
{packet, 4},
31+
{keepalive, true}
32+
]),
33+
OSPid = os:getpid(),
34+
PidMsg = osc:encode(["/tau_pid", OSPid]),
35+
log("Sending Pid ~p to Daemon...~n", [OSPid]),
36+
gen_tcp:send(DaemonSocket, PidMsg),
37+
KillSwitch = erlang:start_timer(5000, self(), trigger_kill_switch),
38+
log("Waiting for keepalive messages..."),
39+
loop(KillSwitch).
40+
41+
loop(KillSwitch) ->
42+
log("KillSwitch loop ~n", []),
43+
receive
44+
{tcp, _Socket, Bin} ->
45+
try osc:decode(Bin) of
46+
{cmd, ["/system/keepalive"]} ->
47+
log("Received keepalive message from Daemon ~n", []),
48+
erlang:cancel_timer(KillSwitch),
49+
NewKillSwitch = erlang:start_timer(5000, self(), trigger_kill_switch),
50+
?MODULE:loop(NewKillSwitch);
51+
Other ->
52+
log("Unexpected message from Daemon:~p~n", [Other]),
53+
?MODULE:loop(KillSwitch)
54+
catch
55+
Class:Term:Trace ->
56+
log("keepalive process: Error decoding OSC: ~p~n~p:~p~n~p~n",
57+
[Bin, Class, Term, Trace]),
58+
?MODULE:loop(KillSwitch)
59+
end;
60+
{timeout, _Timer, trigger_kill_switch} ->
61+
log("Tau kill switch activated. Shutting down....", []),
62+
init:stop();
63+
Any ->
64+
log("Tau keepalive received unexpected message: ~p~n", [Any]),
65+
?MODULE:loop(KillSwitch)
66+
end.

app/server/beam/tau/src/tau_server/tau_server_api.erl

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,9 @@ init(Parent, CueServer, MIDIServer, LinkServer) ->
9090
[erlang:system_info(otp_release), APIPort]),
9191

9292
{ok, APISocket} = gen_udp:open(APIPort, [binary, {ip, loopback}]),
93-
io:format("connecting to Daemon via TCP...~n", []),
94-
{ok, DaemonSocket} = gen_tcp:connect({127,0,0,1}, DaemonPort, [
95-
binary,
96-
{active, true},
97-
{packet, 4},
98-
{keepalive, true}
99-
]),
100-
101-
PidMsg = osc:encode(["/tau_pid", os:getpid()]),
102-
io:format("Sending Pid to Daemon...~n", []),
103-
104-
gen_tcp:send(DaemonSocket, PidMsg),
93+
94+
_KeepAlivePid = tau_keepalive:start(DaemonPort),
95+
10596
%% tell parent we have allocated resources and are up and running
10697
proc_lib:init_ack(Parent, {ok, self()}),
10798

app/server/ruby/bin/daemon.rb

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -548,8 +548,11 @@ def initialize(ports)
548548
spider_port = ports["listen-to-tau"]
549549
daemon_port = ports["daemon-listen-to-tau"]
550550

551+
@osc_out_queue = SizedQueue.new(20)
552+
551553
tau_comms = TCPServer.new "127.0.0.1", daemon_port
552554
osc_decoder = SonicPi::OSC::OscDecode.new
555+
osc_encoder = SonicPi::OSC::OscEncode.new
553556
comms_thread_started = Promise.new
554557

555558
Thread.new do
@@ -561,6 +564,37 @@ def initialize(ports)
561564
client = tau_comms.accept # Wait for a client to connect
562565
Util.log "-----> Connection accepted"
563566

567+
@tau_send_thread = Thread.new do
568+
loop do
569+
begin
570+
osc = @osc_out_queue.pop
571+
client.write([osc.bytesize].pack('N'))
572+
client.write(osc)
573+
rescue StandardError => e
574+
Util.log "Critical Error, sending messages to Tau:"
575+
Util.log "Error Class: #{e.class}"
576+
Util.log "Error Message: #{e.message}"
577+
Util.log "Error Backtrace: #{e.backtrace.inspect}"
578+
end
579+
end
580+
end
581+
582+
@tau_keep_alive_thread = Thread.new do
583+
loop do
584+
begin
585+
osc = osc_encoder.encode_single_message("/system/keepalive")
586+
@osc_out_queue << osc
587+
rescue StandardError => e
588+
Util.log "Critical Error, encoding OSC keepalive message to Tau:"
589+
Util.log "Error Class: #{e.class}"
590+
Util.log "Error Message: #{e.message}"
591+
Util.log "Error Backtrace: #{e.backtrace.inspect}"
592+
ensure
593+
Kernel.sleep 4
594+
end
595+
end
596+
end
597+
564598
recv_osc = lambda do
565599
size_str = client.recvfrom(4, Socket::MSG_WAITALL)[0].chomp
566600
size = size_str.unpack('N')[0]
@@ -569,7 +603,6 @@ def initialize(ports)
569603
osc_decoder.decode_single_message(data_raw)
570604
end
571605

572-
573606
begin
574607
data = recv_osc.call
575608
tau_pid = Integer(data[1][0])
@@ -624,6 +657,8 @@ def process_running?
624657
end
625658

626659
def kill
660+
@tau_send_thread.kill
661+
@tau_keep_alive_thread.kill
627662
begin
628663
@pid = @tau_pid.get(30)
629664
rescue SonicPi::PromiseTimeoutError

0 commit comments

Comments
 (0)