@@ -80,6 +80,8 @@ impl WsSetting {
8080pub struct ConnectQueryParams {
8181 #[ serde( default ) ]
8282 pub reconnect : bool ,
83+ #[ serde( default ) ]
84+ pub opus : bool ,
8385}
8486
8587pub async fn ws_handler (
@@ -929,22 +931,35 @@ async fn process_socket_io(
929931 rx : & mut WsRx ,
930932 audio_tx : ClientTx ,
931933 socket : & mut WebSocket ,
934+ enable_opus : bool ,
932935) -> anyhow:: Result < ( ) > {
936+ let mut opus_encoder =
937+ opus:: Encoder :: new ( SAMPLE_RATE , opus:: Channels :: Mono , opus:: Application :: Voip )
938+ . map_err ( |e| anyhow:: anyhow!( "opus encoder error: {e}" ) ) ?;
939+ let mut ret_audio = Vec :: new ( ) ;
940+
933941 loop {
934942 let r = tokio:: select! {
935943 cmd = rx. recv( ) => {
936944 cmd. map( |cmd| WsEvent :: Command ( cmd) )
937945 }
938946 message = socket. recv( ) => {
939- message. map( |message| match message{
947+ message. map( |message| match message {
940948 Ok ( message) => WsEvent :: Message ( Ok ( message) ) ,
941949 Err ( e) => WsEvent :: Message ( Err ( anyhow:: anyhow!( "recv ws error: {e}" ) ) ) ,
942950 } )
943951 }
944952 } ;
945953
946954 match r {
947- Some ( WsEvent :: Command ( cmd) ) => process_command ( socket, cmd) . await ?,
955+ Some ( WsEvent :: Command ( cmd) ) => {
956+ if enable_opus {
957+ process_command_with_opus ( socket, cmd, & mut opus_encoder, & mut ret_audio)
958+ . await ?
959+ } else {
960+ process_command ( socket, cmd) . await ?
961+ }
962+ }
948963 Some ( WsEvent :: Message ( Ok ( msg) ) ) => match process_message ( msg) {
949964 ProcessMessageResult :: Audio ( d) => audio_tx
950965 . send ( ClientMsg :: AudioChunk ( d) )
@@ -1213,14 +1228,20 @@ async fn handle_socket(
12131228 }
12141229
12151230 log:: info!( "`{}` starting socket io processing" , id) ;
1216- process_socket_io ( & mut cmd_rx, audio_tx, & mut socket) . await ?;
1231+ process_socket_io ( & mut cmd_rx, audio_tx, & mut socket, connect_params . opus ) . await ?;
12171232
12181233 Ok ( ( ) )
12191234}
12201235
12211236pub const SAMPLE_RATE : u32 = 16000 ;
12221237pub const SAMPLE_RATE_BUFFER_SIZE : usize = 2 * ( SAMPLE_RATE as usize ) / 10 ;
12231238
1239+ pub const fn sample_120ms ( sample_rate : u32 ) -> usize {
1240+ ( sample_rate as usize ) * 12 / 100
1241+ }
1242+
1243+ pub const SAMPLE_RATE_120MS : usize = sample_120ms ( SAMPLE_RATE ) ;
1244+
12241245async fn process_command ( ws : & mut WebSocket , cmd : WsCommand ) -> anyhow:: Result < ( ) > {
12251246 match cmd {
12261247 WsCommand :: AsrResult ( texts) => {
@@ -1272,6 +1293,86 @@ async fn process_command(ws: &mut WebSocket, cmd: WsCommand) -> anyhow::Result<(
12721293 Ok ( ( ) )
12731294}
12741295
1296+ async fn process_command_with_opus (
1297+ ws : & mut WebSocket ,
1298+ cmd : WsCommand ,
1299+ opus_encode : & mut opus:: Encoder ,
1300+ ret_audio : & mut Vec < i16 > ,
1301+ ) -> anyhow:: Result < ( ) > {
1302+ match cmd {
1303+ WsCommand :: AsrResult ( texts) => {
1304+ let asr = rmp_serde:: to_vec ( & crate :: protocol:: ServerEvent :: ASR {
1305+ text : texts. join ( "\n " ) ,
1306+ } )
1307+ . expect ( "Failed to serialize ASR ServerEvent" ) ;
1308+ ws. send ( Message :: binary ( asr) ) . await ?;
1309+ }
1310+
1311+ WsCommand :: Action { action } => {
1312+ let action = rmp_serde:: to_vec ( & crate :: protocol:: ServerEvent :: Action { action } )
1313+ . expect ( "Failed to serialize Action ServerEvent" ) ;
1314+ ws. send ( Message :: binary ( action) ) . await ?;
1315+ }
1316+ WsCommand :: StartAudio ( text) => {
1317+ log:: trace!( "StartAudio: {text:?}" ) ;
1318+ opus_encode
1319+ . reset_state ( )
1320+ . map_err ( |e| anyhow:: anyhow!( "opus reset state error: {e}" ) ) ?;
1321+ let start_audio = rmp_serde:: to_vec ( & crate :: protocol:: ServerEvent :: StartAudio { text } )
1322+ . expect ( "Failed to serialize StartAudio ServerEvent" ) ;
1323+ ws. send ( Message :: binary ( start_audio) ) . await ?;
1324+ }
1325+ WsCommand :: Audio ( data) => {
1326+ log:: trace!( "Audio chunk size: {}" , data. len( ) ) ;
1327+ for chunk in data. chunks_exact ( 2 ) {
1328+ let sample = i16:: from_le_bytes ( [ chunk[ 0 ] , chunk[ 1 ] ] ) ;
1329+ ret_audio. push ( sample) ;
1330+ }
1331+
1332+ // 120ms per chunk
1333+ for chunk in ret_audio. chunks ( sample_120ms ( SAMPLE_RATE ) ) {
1334+ if chunk. len ( ) < sample_120ms ( SAMPLE_RATE ) {
1335+ * ret_audio = chunk. to_vec ( ) ;
1336+ break ;
1337+ }
1338+ let data = opus_encode. encode_vec ( chunk, 2 * sample_120ms ( SAMPLE_RATE ) / 3 ) ?;
1339+
1340+ let audio_chunk =
1341+ rmp_serde:: to_vec ( & crate :: protocol:: ServerEvent :: AudioChunk { data } )
1342+ . expect ( "Failed to serialize AudioChunk ServerEvent" ) ;
1343+ ws. send ( Message :: binary ( audio_chunk) ) . await ?;
1344+ }
1345+ }
1346+ WsCommand :: EndAudio => {
1347+ log:: trace!( "EndAudio" ) ;
1348+ if !ret_audio. is_empty ( ) {
1349+ let padded_audio_len = sample_120ms ( SAMPLE_RATE ) - ret_audio. len ( ) ;
1350+ ret_audio. extend ( vec ! [ 0i16 ; padded_audio_len] ) ;
1351+ let data = opus_encode. encode_vec ( & ret_audio, 2 * sample_120ms ( SAMPLE_RATE ) / 3 ) ?;
1352+ let audio_chunk =
1353+ rmp_serde:: to_vec ( & crate :: protocol:: ServerEvent :: AudioChunk { data } )
1354+ . expect ( "Failed to serialize AudioChunk ServerEvent" ) ;
1355+ log:: info!( "Sending final audio chunk of size: {}" , audio_chunk. len( ) ) ;
1356+ ws. send ( Message :: binary ( audio_chunk) ) . await ?;
1357+ ret_audio. clear ( ) ;
1358+ }
1359+ let end_audio = rmp_serde:: to_vec ( & crate :: protocol:: ServerEvent :: EndAudio )
1360+ . expect ( "Failed to serialize EndAudio ServerEvent" ) ;
1361+ ws. send ( Message :: binary ( end_audio) ) . await ?;
1362+ }
1363+ WsCommand :: Video ( _) => {
1364+ log:: warn!( "video command is not implemented yet" ) ;
1365+ }
1366+ WsCommand :: EndResponse => {
1367+ log:: debug!( "EndResponse" ) ;
1368+ let end_response = rmp_serde:: to_vec ( & crate :: protocol:: ServerEvent :: EndResponse )
1369+ . expect ( "Failed to serialize JsonCommand" ) ;
1370+ ws. send ( Message :: binary ( end_response) ) . await ?;
1371+ }
1372+ }
1373+ Ok ( ( ) )
1374+ }
1375+
12751376enum ProcessMessageResult {
12761377 Audio ( Bytes ) ,
12771378 Submit ,
0 commit comments