Skip to content

Commit a520ebd

Browse files
committed
Add mute filter process to mute user input based on specific strategies
1 parent 1951c37 commit a520ebd

File tree

3 files changed

+359
-0
lines changed

3 files changed

+359
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ All notable changes to this project will be documented in this file. This change
2323
- **Examples**: Added example of local microphone AI agent with interruption capability. See [here](./examples/src/simulflow_examples/local_w_interruption_support.clj)
2424
- **Transport In (twilio, async and microphone)**: Added muting support. When transports receive `mute-input-start`, the input isn't processed further and processing resumes when `mute-input-stop` frame is received. Useful for guided conversations where you don't want the user to speak over the bot, during initial greetings or during function calls.
2525
- **User Context Aggregator**: Now emits `frame/llm-tool-call-request` when a the llm requests a tool call. This frame can be used to make the agent say something while the tool handler is called, or trigger a mute filter while executing tool call.
26+
- **[Mute filter processor](./src/simulflow/filters/mute.clj)**: Added mute processor that mutes user input based on specific strategies
2627

2728
### Changed
2829
- Moved most of the llm logic from [openai processor](./src/simulflow/processors/openai.clj) to an utils folder to be used by multiple processors like [gemini](./src/simulflow/processors/google.clj)

src/simulflow/filters/mute.clj

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
(ns simulflow.filters.mute
2+
"This filter handles muting the user input based on specific strategies:
3+
- A function call is in progress
4+
- Bot's first speech (introductions)
5+
- Bot speech (don't process user speech while bot is speaking)
6+
"
7+
(:require
8+
[clojure.core.async.flow :as flow]
9+
[clojure.set :as set]
10+
[simulflow.frame :as frame]
11+
[simulflow.schema :as schema]))
12+
13+
(def MuteStrategy
14+
[:enum
15+
;; Mute user during introduction
16+
:mute.strategy/first-speech
17+
;; Mute user during bot tool calls
18+
:mute.strategy/tool-call
19+
;; Mute user during all bot speech
20+
:mute.strategy/bot-speech])
21+
22+
(def MuteFilterConfig
23+
[:map
24+
[:mute/strategies
25+
{:description "Collection of strategies used to trigger muting the user input."}
26+
[:set MuteStrategy]]])
27+
28+
(def describe
29+
{:ins {:in "Channel for normal frames"
30+
:sys-in "Channel for system frames"}
31+
:outs {:sys-out "Channel for mute-start/stop frames"}
32+
:params (schema/->describe-parameters MuteFilterConfig)})
33+
34+
(defn init
35+
[params]
36+
(let [parsed-params (schema/parse-with-defaults MuteFilterConfig params)]
37+
parsed-params))
38+
39+
(defn transform
40+
[{:keys [mute/strategies ::muted?] :as state} _ msg]
41+
(cond
42+
;; Function call strategy
43+
(and (frame/llm-tool-call-request? msg)
44+
(strategies :mute.strategy/tool-call)
45+
(not muted?))
46+
[(assoc state ::muted? true) (frame/send (frame/mute-input-start))]
47+
48+
(and (frame/llm-tool-call-result? msg)
49+
(strategies :mute.strategy/tool-call)
50+
muted?)
51+
[(assoc state ::muted? false) (frame/send (frame/mute-input-stop))]
52+
53+
;; bot speech & first-speech strategies
54+
(and (frame/bot-speech-start? msg)
55+
(seq (set/intersection strategies #{:mute.strategy/first-speech :mute.strategy/bot-speech}))
56+
(not muted?))
57+
(let [emit-mute-first-speech? (and (strategies :mute.strategy/first-speech)
58+
(not (true? (::first-speech-started? state))))
59+
emit-mute-bot-speech? (strategies :mute.strategy/bot-speech)
60+
emit-mute? (or emit-mute-first-speech? emit-mute-bot-speech?)
61+
62+
ns (cond-> state
63+
emit-mute-first-speech? (assoc ::first-speech-started? true)
64+
emit-mute? (assoc ::muted? true))]
65+
[ns (when emit-mute? (frame/send (frame/mute-input-start)))])
66+
67+
(and (frame/bot-speech-stop? msg)
68+
(seq (set/intersection strategies #{:mute.strategy/first-speech :mute.strategy/bot-speech}))
69+
muted?)
70+
(let [emit-unmute-first-speech? (and (strategies :mute.strategy/first-speech)
71+
(true? (::first-speech-started? state))
72+
(not (true? (::first-speech-ended? state))))
73+
emit-unmute-bot-speech? (strategies :mute.strategy/bot-speech)
74+
emit-unmute? (or emit-unmute-first-speech? emit-unmute-bot-speech?)
75+
76+
ns (cond-> state
77+
emit-unmute-first-speech? (assoc ::first-speech-ended? true)
78+
emit-unmute? (assoc ::muted? false))]
79+
[ns (when emit-unmute? (frame/send (frame/mute-input-stop)))])
80+
81+
:else
82+
[state]))
83+
84+
(defn processor-fn
85+
([] describe)
86+
([params] (init params))
87+
([state _transition] state)
88+
([state in msg]
89+
(transform state in msg)))
90+
91+
(def processor (flow/process processor-fn))
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
(ns simulflow.filters.mute-test
2+
(:require
3+
[clojure.test :refer [deftest is testing]]
4+
[simulflow.filters.mute :as mute]
5+
[simulflow.frame :as frame]))
6+
7+
(deftest transform-tool-call-strategy-test
8+
(testing "tool-call strategy mutes on tool call request"
9+
(let [state {:mute/strategies #{:mute.strategy/tool-call}}
10+
tool-call-msg (frame/llm-tool-call-request {:role :assistant
11+
:tool_calls [{:id "test"
12+
:type :function
13+
:function {:name "test-fn" :arguments "{}"}}]})
14+
[new-state output] (mute/transform state :in tool-call-msg)]
15+
(is (true? (::mute/muted? new-state)))
16+
(is (= 1 (count (:sys-out output))))
17+
(is (frame/mute-input-start? (first (:sys-out output))))))
18+
19+
(testing "tool-call strategy unmutes on tool call result when muted"
20+
(let [state {:mute/strategies #{:mute.strategy/tool-call} ::mute/muted? true}
21+
tool-result-msg (frame/llm-tool-call-result {:request {:role :assistant
22+
:tool_calls [{:id "test"
23+
:type :function
24+
:function {:name "test-fn" :arguments "{}"}}]}
25+
:result {:role :tool
26+
:content "Tool executed successfully"
27+
:tool_call_id "test"}})
28+
[new-state output] (mute/transform state :in tool-result-msg)]
29+
(is (= false (::mute/muted? new-state)))
30+
(is (= 1 (count (:sys-out output))))
31+
(is (frame/mute-input-stop? (first (:sys-out output))))))
32+
33+
(testing "tool-call strategy ignores non-tool frames"
34+
(let [state {:mute/strategies #{:mute.strategy/tool-call}}
35+
regular-msg (frame/transcription "hello")
36+
[new-state output] (mute/transform state :in regular-msg)]
37+
(is (= state new-state))
38+
(is (empty? output))))
39+
40+
(testing "no mute when tool-call strategy not enabled"
41+
(let [state {:mute/strategies #{:mute.strategy/bot-speech}}
42+
tool-call-msg (frame/llm-tool-call-request {:role :assistant
43+
:tool_calls [{:id "test"
44+
:type :function
45+
:function {:name "test-fn" :arguments "{}"}}]})
46+
[new-state output] (mute/transform state :in tool-call-msg)]
47+
(is (= state new-state))
48+
(is (empty? output))))
49+
50+
(testing "tool-call strategy does not mute when already muted"
51+
(let [state {:mute/strategies #{:mute.strategy/tool-call} ::mute/muted? true}
52+
tool-call-msg (frame/llm-tool-call-request {:role :assistant
53+
:tool_calls [{:id "test"
54+
:type :function
55+
:function {:name "test-fn" :arguments "{}"}}]})
56+
[new-state output] (mute/transform state :in tool-call-msg)]
57+
(is (= state new-state))
58+
(is (empty? output))))
59+
60+
(testing "tool-call strategy does not unmute when not muted"
61+
(let [state {:mute/strategies #{:mute.strategy/tool-call} ::mute/muted? false}
62+
tool-result-msg (frame/llm-tool-call-result {:request {:role :assistant
63+
:tool_calls [{:id "test"
64+
:type :function
65+
:function {:name "test-fn" :arguments "{}"}}]}
66+
:result {:role :tool
67+
:content "Tool executed successfully"
68+
:tool_call_id "test"}})
69+
[new-state output] (mute/transform state :in tool-result-msg)]
70+
(is (= state new-state))
71+
(is (empty? output)))))
72+
73+
(deftest transform-bot-speech-strategy-test
74+
(testing "bot-speech strategy mutes on bot speech start"
75+
(let [state {:mute/strategies #{:mute.strategy/bot-speech}}
76+
bot-start-msg (frame/bot-speech-start true)
77+
[new-state output] (mute/transform state :in bot-start-msg)]
78+
(is (= true (::mute/muted? new-state)))
79+
(is (= 1 (count (:sys-out output))))
80+
(is (frame/mute-input-start? (first (:sys-out output))))))
81+
82+
(testing "bot-speech strategy unmutes on bot speech stop when muted"
83+
(let [state {:mute/strategies #{:mute.strategy/bot-speech} ::mute/muted? true}
84+
bot-stop-msg (frame/bot-speech-stop true)
85+
[new-state output] (mute/transform state :in bot-stop-msg)]
86+
(is (= false (::mute/muted? new-state)))
87+
(is (= 1 (count (:sys-out output))))
88+
(is (frame/mute-input-stop? (first (:sys-out output))))))
89+
90+
(testing "no mute when bot-speech strategy not enabled"
91+
(let [state {:mute/strategies #{:mute.strategy/tool-call}}
92+
bot-start-msg (frame/bot-speech-start true)
93+
[new-state output] (mute/transform state :in bot-start-msg)]
94+
(is (= state new-state))
95+
(is (empty? output))))
96+
97+
(testing "bot-speech strategy does not mute when already muted"
98+
(let [state {:mute/strategies #{:mute.strategy/bot-speech} ::mute/muted? true}
99+
bot-start-msg (frame/bot-speech-start true)
100+
[new-state output] (mute/transform state :in bot-start-msg)]
101+
(is (= state new-state))
102+
(is (empty? output))))
103+
104+
(testing "bot-speech strategy does not unmute when not muted"
105+
(let [state {:mute/strategies #{:mute.strategy/bot-speech} ::mute/muted? false}
106+
bot-stop-msg (frame/bot-speech-stop true)
107+
[new-state output] (mute/transform state :in bot-stop-msg)]
108+
(is (= state new-state))
109+
(is (empty? output)))))
110+
111+
(deftest transform-first-speech-strategy-test
112+
(testing "first-speech strategy mutes only on first bot speech"
113+
(let [state {:mute/strategies #{:mute.strategy/first-speech}}
114+
bot-start-msg (frame/bot-speech-start true)
115+
[new-state output] (mute/transform state :in bot-start-msg)]
116+
(is (= true (::mute/first-speech-started? new-state)))
117+
(is (= true (::mute/muted? new-state)))
118+
(is (= 1 (count (:sys-out output))))
119+
(is (frame/mute-input-start? (first (:sys-out output))))))
120+
121+
(testing "first-speech strategy does not mute on second bot speech start"
122+
(let [state {:mute/strategies #{:mute.strategy/first-speech}
123+
::mute/first-speech-started? true}
124+
bot-start-msg (frame/bot-speech-start true)
125+
[new-state output] (mute/transform state :in bot-start-msg)]
126+
(is (= state new-state))
127+
(is (empty? output))))
128+
129+
(testing "first-speech strategy unmutes on first bot speech stop when muted"
130+
(let [state {:mute/strategies #{:mute.strategy/first-speech}
131+
::mute/first-speech-started? true
132+
::mute/muted? true}
133+
bot-stop-msg (frame/bot-speech-stop true)
134+
[new-state output] (mute/transform state :in bot-stop-msg)]
135+
(is (= true (::mute/first-speech-ended? new-state)))
136+
(is (= false (::mute/muted? new-state)))
137+
(is (= 1 (count (:sys-out output))))
138+
(is (frame/mute-input-stop? (first (:sys-out output))))))
139+
140+
(testing "first-speech strategy does not unmute on subsequent bot speech stops"
141+
(let [state {:mute/strategies #{:mute.strategy/first-speech}
142+
::mute/first-speech-started? true
143+
::mute/first-speech-ended? true
144+
::mute/muted? false}
145+
bot-stop-msg (frame/bot-speech-stop true)
146+
[new-state output] (mute/transform state :in bot-stop-msg)]
147+
(is (= state new-state))
148+
(is (empty? output)))))
149+
150+
(deftest transform-combined-strategies-test
151+
(testing "first-speech and bot-speech strategies both trigger on first speech"
152+
(let [state {:mute/strategies #{:mute.strategy/first-speech :mute.strategy/bot-speech}}
153+
bot-start-msg (frame/bot-speech-start true)
154+
[new-state output] (mute/transform state :in bot-start-msg)]
155+
(is (= true (::mute/first-speech-started? new-state)))
156+
(is (= true (::mute/muted? new-state)))
157+
(is (= 1 (count (:sys-out output))))
158+
(is (frame/mute-input-start? (first (:sys-out output))))))
159+
160+
(testing "only bot-speech strategy triggers on second speech when first-speech already ended"
161+
(let [state {:mute/strategies #{:mute.strategy/first-speech :mute.strategy/bot-speech}
162+
::mute/first-speech-started? true
163+
::mute/first-speech-ended? true}
164+
bot-start-msg (frame/bot-speech-start true)
165+
[new-state output] (mute/transform state :in bot-start-msg)]
166+
(is (= true (::mute/muted? new-state)))
167+
(is (= 1 (count (:sys-out output))))
168+
(is (frame/mute-input-start? (first (:sys-out output))))))
169+
170+
(testing "all strategies can work together"
171+
(let [state {:mute/strategies #{:mute.strategy/first-speech :mute.strategy/bot-speech :mute.strategy/tool-call}}
172+
tool-call-msg (frame/llm-tool-call-request {:role :assistant
173+
:tool_calls [{:id "test"
174+
:type :function
175+
:function {:name "test-fn" :arguments "{}"}}]})
176+
[new-state output] (mute/transform state :in tool-call-msg)]
177+
(is (= true (::mute/muted? new-state)))
178+
(is (= 1 (count (:sys-out output))))
179+
(is (frame/mute-input-start? (first (:sys-out output)))))))
180+
181+
(deftest transform-no-strategies-test
182+
(testing "no mute frames when no strategies enabled"
183+
(let [state {:mute/strategies #{}}
184+
test-frames [(frame/bot-speech-start true)
185+
(frame/bot-speech-stop true)
186+
(frame/llm-tool-call-request {:role :assistant
187+
:tool_calls [{:id "test"
188+
:type :function
189+
:function {:name "test-fn" :arguments "{}"}}]})
190+
(frame/llm-tool-call-result {:request {:role :assistant
191+
:tool_calls [{:id "test"
192+
:type :function
193+
:function {:name "test-fn" :arguments "{}"}}]}
194+
:result {:role :tool
195+
:content "Tool executed successfully"
196+
:tool_call_id "test"}})]]
197+
(doseq [test-frame test-frames]
198+
(let [original-state state
199+
[new-state output] (mute/transform state :in test-frame)]
200+
(is (= original-state new-state))
201+
(is (empty? output)))))))
202+
203+
(deftest transform-invalid-frames-test
204+
(testing "returns unchanged state for unhandled frames"
205+
(let [state {:mute/strategies #{:mute.strategy/bot-speech :mute.strategy/tool-call :mute.strategy/first-speech}}
206+
unhandled-frames [(frame/transcription "hello")
207+
(frame/user-speech-start true)
208+
(frame/user-speech-stop true)
209+
(frame/llm-text-chunk "some text")]]
210+
(doseq [test-frame unhandled-frames]
211+
(let [original-state state
212+
[new-state output] (mute/transform state :in test-frame)]
213+
(is (= original-state new-state))
214+
(is (empty? output)))))))
215+
216+
(deftest init-test
217+
(testing "init parses valid config"
218+
(let [config {:mute/strategies #{:mute.strategy/bot-speech :mute.strategy/tool-call}}
219+
result (mute/init config)]
220+
(is (= #{:mute.strategy/bot-speech :mute.strategy/tool-call} (:mute/strategies result)))))
221+
222+
(testing "init with single strategy"
223+
(let [config {:mute/strategies #{:mute.strategy/first-speech}}
224+
result (mute/init config)]
225+
(is (= #{:mute.strategy/first-speech} (:mute/strategies result)))))
226+
227+
(testing "init with all strategies"
228+
(let [config {:mute/strategies #{:mute.strategy/first-speech
229+
:mute.strategy/bot-speech
230+
:mute.strategy/tool-call}}
231+
result (mute/init config)]
232+
(is (= 3 (count (:mute/strategies result))))
233+
(is (contains? (:mute/strategies result) :mute.strategy/first-speech))
234+
(is (contains? (:mute/strategies result) :mute.strategy/bot-speech))
235+
(is (contains? (:mute/strategies result) :mute.strategy/tool-call))))
236+
237+
(testing "init fails with invalid config"
238+
(is (thrown? Exception (mute/init {})))
239+
(is (thrown? Exception (mute/init {:mute/strategies #{:invalid-strategy}})))))
240+
241+
(deftest processor-fn-test
242+
(testing "0-arity returns description"
243+
(let [desc (mute/processor-fn)]
244+
(is (contains? desc :ins))
245+
(is (contains? desc :outs))
246+
(is (contains? desc :params))
247+
(is (= "Channel for normal frames" (get-in desc [:ins :in])))
248+
(is (= "Channel for system frames" (get-in desc [:ins :sys-in])))
249+
(is (= "Channel for mute-start/stop frames" (get-in desc [:outs :sys-out])))))
250+
251+
(testing "1-arity calls init"
252+
(let [config {:mute/strategies #{:mute.strategy/bot-speech}}
253+
result (mute/processor-fn config)]
254+
(is (= #{:mute.strategy/bot-speech} (:mute/strategies result)))))
255+
256+
(testing "2-arity returns state unchanged"
257+
(let [state {:mute/strategies #{:mute.strategy/tool-call}}
258+
result (mute/processor-fn state :some-transition)]
259+
(is (= state result))))
260+
261+
(testing "3-arity calls transform"
262+
(let [state {:mute/strategies #{:mute.strategy/bot-speech}}
263+
frame (frame/bot-speech-start true)
264+
[new-state output] (mute/processor-fn state :in frame)]
265+
(is (= true (::mute/muted? new-state)))
266+
(is (= 1 (count (:sys-out output))))
267+
(is (frame/mute-input-start? (first (:sys-out output)))))))

0 commit comments

Comments
 (0)