|
33 | 33 |
|
34 | 34 | (def wrap-exception |
35 | 35 | (exception/create-exception-middleware |
36 | | - (merge |
37 | | - exception/default-handlers |
38 | | - {;; print stack-traces for all exceptions |
39 | | - ::exception/wrap (fn [handler e request] |
40 | | - (t/log! :error e) |
41 | | - (handler e request))}))) |
| 36 | + (merge |
| 37 | + exception/default-handlers |
| 38 | + {;; print stack-traces for all exceptions |
| 39 | + ::exception/wrap (fn [handler e request] |
| 40 | + (t/log! :error e) |
| 41 | + (handler e request))}))) |
42 | 42 |
|
43 | 43 | (defn emit-xml-str |
44 | 44 | "Emit the string for this xml declaration" |
|
67 | 67 | ws-url (str "wss://" h "/ws")] |
68 | 68 | ;; https://www.twilio.com/docs/voice/twiml/connect |
69 | 69 | (xml-response |
70 | | - (emit-xml-str [:Response |
71 | | - [:Connect |
72 | | - [:Stream {:url ws-url}]]])))) |
| 70 | + (emit-xml-str [:Response |
| 71 | + [:Connect |
| 72 | + [:Stream {:url ws-url}]]])))) |
73 | 73 |
|
74 | 74 | (def dbg-flow (atom nil)) |
75 | 75 |
|
|
93 | 93 | chunk-duration-ms 20] |
94 | 94 | {:procs |
95 | 95 | (u/deep-merge |
96 | | - {:transport-in {:proc transport/twilio-transport-in |
97 | | - :args {:transport/in-ch in}} |
98 | | - :transcriptor {:proc asr/deepgram-processor |
99 | | - :args {:transcription/api-key (secret [:deepgram :api-key]) |
100 | | - :transcription/interim-results? true |
101 | | - :transcription/punctuate? false |
102 | | - :transcription/vad-events? true |
103 | | - :transcription/smart-format? true |
104 | | - :transcription/model :nova-2 |
105 | | - :transcription/utterance-end-ms 1000 |
106 | | - :transcription/language language |
107 | | - :transcription/encoding :ulaw |
108 | | - :transcription/sample-rate sample-rate}} |
109 | | - :context-aggregator {:proc context/context-aggregator |
110 | | - :args {:llm/context llm-context |
111 | | - :aggregator/debug? false}} |
| 96 | + {:transport-in {:proc transport/twilio-transport-in |
| 97 | + :args {:transport/in-ch in}} |
| 98 | + :transcriptor {:proc asr/deepgram-processor |
| 99 | + :args {:transcription/api-key (secret [:deepgram :api-key]) |
| 100 | + :transcription/interim-results? true |
| 101 | + :transcription/punctuate? false |
| 102 | + :transcription/vad-events? true |
| 103 | + :transcription/smart-format? true |
| 104 | + :transcription/model :nova-2 |
| 105 | + :transcription/utterance-end-ms 1000 |
| 106 | + :transcription/language language |
| 107 | + :transcription/encoding :ulaw |
| 108 | + :transcription/sample-rate sample-rate}} |
| 109 | + :context-aggregator {:proc context/context-aggregator |
| 110 | + :args {:llm/context llm-context |
| 111 | + :aggregator/debug? false}} |
112 | 112 |
|
113 | | - :llm {:proc llm/openai-llm-process |
114 | | - :args {:openai/api-key (secret [:openai :new-api-sk]) |
115 | | - :llm/model "gpt-4o-mini"}} |
| 113 | + :llm {:proc llm/openai-llm-process |
| 114 | + :args {:openai/api-key (secret [:openai :new-api-sk]) |
| 115 | + :llm/model "gpt-4o-mini"}} |
116 | 116 |
|
117 | | - :assistant-context-assembler {:proc context/assistant-context-assembler |
118 | | - :args {:debug? false}} |
| 117 | + :assistant-context-assembler {:proc context/assistant-context-assembler |
| 118 | + :args {:debug? false}} |
119 | 119 |
|
120 | | - :llm-sentence-assembler {:proc context/llm-sentence-assembler} |
121 | | - :tts {:proc tts/elevenlabs-tts-process |
122 | | - :args {:elevenlabs/api-key (secret [:elevenlabs :api-key]) |
123 | | - :elevenlabs/model-id "eleven_flash_v2_5" |
124 | | - :elevenlabs/voice-id (secret [:elevenlabs :voice-id]) |
125 | | - :voice/stability 0.5 |
126 | | - :voice/similarity-boost 0.8 |
127 | | - :voice/use-speaker-boost? true |
128 | | - :flow/language language |
129 | | - :audio.out/encoding encoding |
130 | | - :audio.out/sample-rate sample-rate}} |
131 | | - :audio-splitter {:proc transport/audio-splitter |
132 | | - :args {:audio.out/sample-rate sample-rate |
133 | | - :audio.out/sample-size-bits sample-size-bits |
134 | | - :audio.out/channels channels |
135 | | - :audio.out/duration-ms chunk-duration-ms}} |
136 | | - :realtime-out {:proc transport-out/realtime-out-processor |
137 | | - :args {:audio.out/chan out |
138 | | - :audio.out/sending-interval 20}} |
139 | | - :activity-monitor {:proc activity-monitor/process |
140 | | - :args {::activity-monitor/timeout-ms 5000}} |
| 120 | + :llm-sentence-assembler {:proc context/llm-sentence-assembler} |
| 121 | + :tts {:proc tts/elevenlabs-tts-process |
| 122 | + :args {:elevenlabs/api-key (secret [:elevenlabs :api-key]) |
| 123 | + :elevenlabs/model-id "eleven_flash_v2_5" |
| 124 | + :elevenlabs/voice-id (secret [:elevenlabs :voice-id]) |
| 125 | + :voice/stability 0.5 |
| 126 | + :voice/similarity-boost 0.8 |
| 127 | + :voice/use-speaker-boost? true |
| 128 | + :flow/language language |
| 129 | + :audio.out/encoding encoding |
| 130 | + :audio.out/sample-rate sample-rate}} |
| 131 | + :audio-splitter {:proc transport/audio-splitter |
| 132 | + :args {:audio.out/sample-rate sample-rate |
| 133 | + :audio.out/sample-size-bits sample-size-bits |
| 134 | + :audio.out/channels channels |
| 135 | + :audio.out/duration-ms chunk-duration-ms}} |
| 136 | + :realtime-out {:proc transport-out/realtime-out-processor |
| 137 | + :args {:audio.out/chan out |
| 138 | + :audio.out/sending-interval 20}} |
| 139 | + :activity-monitor {:proc activity-monitor/process |
| 140 | + :args {::activity-monitor/timeout-ms 5000}} |
141 | 141 |
|
142 | | - :prn-sink {:proc (flow/process (fn |
143 | | - ([] {:ins {:in "gimme stuff to print!"}}) |
144 | | - ([_] nil) |
145 | | - ([_ _] nil) |
146 | | - ([_ _ v] (t/log! {:id :prn-sink :data v}))))}} |
147 | | - extra-procs) |
| 142 | + :prn-sink {:proc (flow/process (fn |
| 143 | + ([] {:ins {:in "gimme stuff to print!"}}) |
| 144 | + ([_] nil) |
| 145 | + ([_ _] nil) |
| 146 | + ([_ _ v] (t/log! {:id :prn-sink :data v}))))}} |
| 147 | + extra-procs) |
148 | 148 |
|
149 | 149 | :conns (concat |
150 | | - [[[:transport-in :sys-out] [:transcriptor :sys-in]] |
151 | | - [[:transport-in :out] [:transcriptor :in]] |
| 150 | + [[[:transport-in :sys-out] [:transcriptor :sys-in]] |
| 151 | + [[:transport-in :out] [:transcriptor :in]] |
152 | 152 |
|
153 | | - [[:transcriptor :out] [:context-aggregator :in]] |
154 | | - [[:context-aggregator :out] [:llm :in]] |
| 153 | + [[:transcriptor :out] [:context-aggregator :in]] |
| 154 | + [[:context-aggregator :out] [:llm :in]] |
155 | 155 |
|
156 | | - ;; Aggregate full context |
157 | | - [[:llm :out] [:assistant-context-assembler :in]] |
158 | | - [[:assistant-context-assembler :out] [:context-aggregator :in]] |
| 156 | + ;; Aggregate full context |
| 157 | + [[:llm :out] [:assistant-context-assembler :in]] |
| 158 | + [[:assistant-context-assembler :out] [:context-aggregator :in]] |
159 | 159 |
|
160 | | - ;; Assemble sentence by sentence for fast speech |
161 | | - [[:llm :out] [:llm-sentence-assembler :in]] |
162 | | - [[:llm-sentence-assembler :out] [:tts :in]] |
| 160 | + ;; Assemble sentence by sentence for fast speech |
| 161 | + [[:llm :out] [:llm-sentence-assembler :in]] |
| 162 | + [[:llm-sentence-assembler :out] [:tts :in]] |
163 | 163 |
|
164 | | - [[:tts :out] [:audio-splitter :in]] |
165 | | - [[:transport-in :sys-out] [:realtime-out :sys-in]] |
166 | | - [[:audio-splitter :out] [:realtime-out :in]] |
| 164 | + [[:tts :out] [:audio-splitter :in]] |
| 165 | + [[:transport-in :sys-out] [:realtime-out :sys-in]] |
| 166 | + [[:audio-splitter :out] [:realtime-out :in]] |
167 | 167 |
|
168 | | - ;; Activity monitor connections - basically check if there is |
169 | | - ;; activity on the pipeline |
170 | | - [[:realtime-out :out] [:activity-monitor :in]] |
171 | | - [[:transcriptor :out] [:activity-monitor :in]] |
172 | | - [[:activity-monitor :out] [:context-aggregator :in]] |
173 | | - [[:activity-monitor :out] [:tts :in]]] |
174 | | - extra-conns)})) |
| 168 | + ;; Activity monitor connections - basically check if there is |
| 169 | + ;; activity on the pipeline |
| 170 | + [[:realtime-out :out] [:activity-monitor :in]] |
| 171 | + [[:transcriptor :out] [:activity-monitor :in]] |
| 172 | + [[:activity-monitor :out] [:context-aggregator :in]] |
| 173 | + [[:activity-monitor :out] [:tts :in]]] |
| 174 | + extra-conns)})) |
175 | 175 |
|
176 | 176 | (defn tool-use-example |
177 | 177 | "Tools are specified in the :llm/context :tools vector. |
|
184 | 184 | [in out] |
185 | 185 | {:flow |
186 | 186 | (flow/create-flow |
187 | | - (phone-flow |
188 | | - {:in in |
189 | | - :out out |
190 | | - :llm/context {:messages |
191 | | - [{:role "system" |
192 | | - :content "You are a voice agent operating via phone. Be |
| 187 | + (phone-flow |
| 188 | + {:in in |
| 189 | + :out out |
| 190 | + :llm/context {:messages |
| 191 | + [{:role "system" |
| 192 | + :content "You are a voice agent operating via phone. Be |
193 | 193 | concise. The input you receive comes from a |
194 | 194 | speech-to-text (transcription) system that isn't always |
195 | 195 | efficient and may send unclear text. Ask for |
196 | 196 | clarification when you're unsure what the person said."}] |
197 | | - :tools |
198 | | - [{:type :function |
199 | | - :function |
200 | | - {:name "get_weather" |
201 | | - :handler (fn [{:keys [town]}] (str "The weather in " town " is 17 degrees celsius")) |
202 | | - :description "Get the current weather of a location" |
203 | | - :parameters {:type :object |
204 | | - :required [:town] |
205 | | - :properties {:town {:type :string |
206 | | - :description "Town for which to retrieve the current weather"}} |
207 | | - :additionalProperties false} |
208 | | - :strict true}}]}}))}) |
| 197 | + :tools |
| 198 | + [{:type :function |
| 199 | + :function |
| 200 | + {:name "get_weather" |
| 201 | + :handler (fn [{:keys [town]}] (str "The weather in " town " is 17 degrees celsius")) |
| 202 | + :description "Get the current weather of a location" |
| 203 | + :parameters {:type :object |
| 204 | + :required [:town] |
| 205 | + :properties {:town {:type :string |
| 206 | + :description "Town for which to retrieve the current weather"}} |
| 207 | + :additionalProperties false} |
| 208 | + :strict true}}]}}))}) |
209 | 209 |
|
210 | 210 | (defn make-twilio-ws-handler |
211 | 211 | [make-flow] |
|
259 | 259 |
|
260 | 260 | (def app |
261 | 261 | (ring/ring-handler |
262 | | - (ring/router |
263 | | - routes |
264 | | - {:exception pretty/exception |
265 | | - :data {:muuntaja mtj/instance |
266 | | - :middleware [;; query-params & form-params |
267 | | - parameters/parameters-middleware |
268 | | - ;; content-negotiation |
269 | | - muuntaja/format-negotiate-middleware |
270 | | - ;; encoding response body |
271 | | - muuntaja/format-response-middleware |
272 | | - ;; exception handling |
273 | | - wrap-exception |
274 | | - ;; decoding request body |
275 | | - muuntaja/format-request-middleware |
276 | | - ;; coercing response bodys |
277 | | - coercion/coerce-response-middleware |
278 | | - ;; coercing request parameters |
279 | | - coercion/coerce-request-middleware]}}) |
280 | | - (ring/create-default-handler))) |
| 262 | + (ring/router |
| 263 | + routes |
| 264 | + {:exception pretty/exception |
| 265 | + :data {:muuntaja mtj/instance |
| 266 | + :middleware [;; query-params & form-params |
| 267 | + parameters/parameters-middleware |
| 268 | + ;; content-negotiation |
| 269 | + muuntaja/format-negotiate-middleware |
| 270 | + ;; encoding response body |
| 271 | + muuntaja/format-response-middleware |
| 272 | + ;; exception handling |
| 273 | + wrap-exception |
| 274 | + ;; decoding request body |
| 275 | + muuntaja/format-request-middleware |
| 276 | + ;; coercing response bodys |
| 277 | + coercion/coerce-response-middleware |
| 278 | + ;; coercing request parameters |
| 279 | + coercion/coerce-request-middleware]}}) |
| 280 | + (ring/create-default-handler))) |
281 | 281 |
|
282 | 282 | (defn start [& {:keys [port] :or {port 3000}}] |
283 | 283 | (println (str "server running in port " port)) |
|
0 commit comments