k2-fsa · manickavela29 · Feb 27, 2025 · Feb 28, 2025 · Feb 28, 2025
diff --git a/python-api-examples/online-websocket-server.py b/python-api-examples/online-websocket-server.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+# Copyright      2025  Uniphore
+
+'''
+Real-time speech recognition server using WebSockets.
+Python API interface to start the server.
+Python wrapper around implementation of online-websocket-server.cc in C++.
+
+(1) Download streaming transducer model
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-2023-06-26.tar.bz2
+tar xvf sherpa-onnx-streaming-zipformer-en-2023-06-26.tar.bz2
+rm sherpa-onnx-streaming-zipformer-en-2023-06-26.tar.bz2
+
+(2) Starting websocket server using the downloaded model
+
+python3 ./python-api-examples/online-websocket-server.py \
+  --tokens=./sherpa-onnx-streaming-zipformer-en-2023-06-26/tokens.txt \
+  --encoder=./sherpa-onnx-streaming-zipformer-en-2023-06-26/encoder-epoch-99-avg-1-chunk-16-left-128.onnx \
+  --decoder=./sherpa-onnx-streaming-zipformer-en-2023-06-26/decoder-epoch-99-avg-1-chunk-16-left-128.onnx \
+  --joiner=./sherpa-onnx-streaming-zipformer-en-2023-06-26/joiner-epoch-99-avg-1-chunk-16-left-128.onnx \
+  --max-batch-size=5 \
+  --loop-interval-ms=10
+
+'''
+import argparse
+import sys
+import signal
+from sherpa_onnx import OnlineWebSocketServer
+
+def signal_handler(sig, frame):
+    print('Exiting...')
+    sys.exit(0)
+
+# Bind SIGINT to signal_handler
+signal.signal(signal.SIGINT, signal_handler)
+
+if __name__ == "__main__":    
+    args = sys.argv[:]
+    OnlineWebSocketServer(server_args=args)
diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt
@@ -87,6 +87,9 @@ set(sources
   online-transducer-model.cc
   online-transducer-modified-beam-search-decoder.cc
   online-transducer-nemo-model.cc
+  online-transducer-greedy-search-nemo-decoder.cc
+  online-websocket-server.cc
+  online-websocket-server-impl.cc
   online-wenet-ctc-model-config.cc
   online-wenet-ctc-model.cc
   online-zipformer-transducer-model.cc

diff --git a/sherpa-onnx/csrc/online-websocket-server.cc b/sherpa-onnx/csrc/online-websocket-server.cc
@@ -1,11 +1,12 @@
 // sherpa-onnx/csrc/online-websocket-server.cc
 //
 // Copyright (c)  2022-2023  Xiaomi Corporation
+// Copyright (c)  2025  Uniphore (Author: Manickavela A)
 
-#include "asio.hpp"
-#include "sherpa-onnx/csrc/macros.h"
-#include "sherpa-onnx/csrc/online-websocket-server-impl.h"
-#include "sherpa-onnx/csrc/parse-options.h"
+#include <string>
+#include <csignal>
+
+#include "sherpa-onnx/csrc/online-websocket-server.h"
 
 static constexpr const char *kUsageMessage = R"(
 Automatic speech recognition with sherpa-onnx using websocket.
@@ -30,80 +31,121 @@ Please refer to
 for a list of pre-trained models to download.
 )";
 
-int32_t main(int32_t argc, char *argv[]) {
-  sherpa_onnx::ParseOptions po(kUsageMessage);
 
+// Global server instance pointer for signal handling
+OnlineWebsocketServerApp *global_server_instance = nullptr;
+
+// Signal handler to stop the server
+void SignalHandler(int signal) {
+    if (signal == SIGINT || signal == SIGTERM) {
+        SHERPA_ONNX_LOGE("\nSignal %d received. Stopping server...", signal);
+        if (global_server_instance) {
+            global_server_instance->Stop();
+        }
+    }
+}
+
+OnlineWebsocketServerApp::OnlineWebsocketServerApp(
+  int32_t argc, char *argv[]) : argc_(argc), argv_(argv) {}
+
+void OnlineWebsocketServerApp::Run() {
+  sherpa_onnx::ParseOptions po(kUsageMessage);
   sherpa_onnx::OnlineWebsocketServerConfig config;
 
-  // the server will listen on this port
-  int32_t port = 6006;
+    // the server will listen on this port
+    int32_t port = 6006;
 
-  // size of the thread pool for handling network connections
-  int32_t num_io_threads = 1;
+    // size of the thread pool for handling network connections
+    int32_t num_io_threads = 1;
 
-  // size of the thread pool for neural network computation and decoding
-  int32_t num_work_threads = 3;
+    // size of the thread pool for neural network computation and decoding
+    int32_t num_work_threads = 3;
 
-  po.Register("num-io-threads", &num_io_threads,
-              "Thread pool size for network connections.");
+    po.Register("num-io-threads", &num_io_threads,
+                "Thread pool size for network connections.");
 
-  po.Register("num-work-threads", &num_work_threads,
-              "Thread pool size for for neural network "
-              "computation and decoding.");
+    po.Register("num-work-threads", &num_work_threads,
+                "Thread pool size for for neural network "
+                "computation and decoding.");
 
-  po.Register("port", &port, "The port on which the server will listen.");
+    po.Register("port", &port, "The port on which the server will listen.");
 
-  config.Register(&po);
+    config.Register(&po);
 
-  if (argc == 1) {
-    po.PrintUsage();
-    exit(EXIT_FAILURE);
-  }
+    if (argc_ == 1) {
+        po.PrintUsage();
+        exit(EXIT_FAILURE);
+    }
 
-  po.Read(argc, argv);
+    po.Read(argc_, argv_);
 
-  if (po.NumArgs() != 0) {
-    SHERPA_ONNX_LOGE("Unrecognized positional arguments!");
-    po.PrintUsage();
-    exit(EXIT_FAILURE);
-  }
+    if (po.NumArgs() != 0) {
+        SHERPA_ONNX_LOGE("Unrecognized positional arguments!");
+        po.PrintUsage();
+        exit(EXIT_FAILURE);
+    }
 
-  config.Validate();
+    config.Validate();
 
-  asio::io_context io_conn;  // for network connections
-  asio::io_context io_work;  // for neural network and decoding
+    // Set the global instance for signal handling
+    global_server_instance = this;
 
-  sherpa_onnx::OnlineWebsocketServer server(io_conn, io_work, config);
-  server.Run(port);
+    // Handle SIGINT and SIGTERM
+    std::signal(SIGINT, SignalHandler);
+    std::signal(SIGTERM, SignalHandler);
 
-  SHERPA_ONNX_LOGE("Started!");
-  SHERPA_ONNX_LOGE("Listening on: %d", port);
-  SHERPA_ONNX_LOGE("Number of work threads: %d", num_work_threads);
+    // io_conn for network connections
+    // io_work for neural network and decoding
 
-  // give some work to do for the io_work pool
-  auto work_guard = asio::make_work_guard(io_work);
+    sherpa_onnx::OnlineWebsocketServer server(io_conn_, io_work_, config);
+    server.Run(port);
 
-  std::vector<std::thread> io_threads;
+    SHERPA_ONNX_LOGE("Started!");
+    SHERPA_ONNX_LOGE("Listening on: %d", port);
+    SHERPA_ONNX_LOGE("Number of work threads: %d", num_work_threads);
 
-  // decrement since the main thread is also used for network communications
-  for (int32_t i = 0; i < num_io_threads - 1; ++i) {
-    io_threads.emplace_back([&io_conn]() { io_conn.run(); });
-  }
+    // give some work to do for the io_work pool
+    auto work_guard = asio::make_work_guard(io_work_);
 
-  std::vector<std::thread> work_threads;
-  for (int32_t i = 0; i < num_work_threads; ++i) {
-    work_threads.emplace_back([&io_work]() { io_work.run(); });
-  }
+    std::vector<std::thread> io_threads;
 
-  io_conn.run();
+    // decrement since the main thread is also used for network communications
+    for (int32_t i = 0; i < num_io_threads - 1; ++i) {
+        io_threads.emplace_back([this]() { io_conn_.run(); });
+    }
 
-  for (auto &t : io_threads) {
-    t.join();
-  }
+    std::vector<std::thread> work_threads;
+    for (int32_t i = 0; i < num_work_threads; ++i) {
+        work_threads.emplace_back([this]() { io_work_.run(); });
+    }
 
-  for (auto &t : work_threads) {
-    t.join();
-  }
+    // Main thread handles IO
+    io_conn_.run();
 
-  return 0;
+    for (auto &t : io_threads) {
+        t.join();
+    }
+
+    for (auto &t : work_threads) {
+        t.join();
+    }
+    SHERPA_ONNX_LOGE("Server shut down gracefully.");
+}
+
+void OnlineWebsocketServerApp::Stop() {
+    shutdown_requested_.store(true);
+    io_conn_.stop();
+    io_work_.stop();
+    SHERPA_ONNX_LOGE("Server stopping...");
 }
+
+int32_t main(int32_t argc, char *argv[]) {
+    OnlineWebsocketServerApp app(argc, argv);
+    app.Run();
+    return 0;
+}
+
+void StartServer(int32_t argc, char *argv[]) {
+    OnlineWebsocketServerApp app(argc, argv);
+    app.Run();
+}
diff --git a/sherpa-onnx/csrc/online-websocket-server.h b/sherpa-onnx/csrc/online-websocket-server.h
@@ -0,0 +1,33 @@
+// sherpa-onnx/csrc/online-wenet-ctc-model-config.h
+//
+// Copyright (c)  2025  Uniphore (Author: Manickavela A)s
+
+#ifndef SHERPA_ONNX_ONLINE_WEBSOCKET_SERVER_H
+#define SHERPA_ONNX_ONLINE_WEBSOCKET_SERVER_H
+
+#include <asio.hpp>
+#include <thread>
+#include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/online-websocket-server-impl.h"
+#include "sherpa-onnx/csrc/parse-options.h"
+
+class OnlineWebsocketServerApp {
+public:
+    OnlineWebsocketServerApp(int32_t argc, char *argv[]);
+    void Run();
+    void Stop();
+
+private:
+    int32_t argc_;
+    char **argv_;
+    asio::io_context io_conn_;  // ASIO context for connections
+    asio::io_context io_work_;  // ASIO context for work
+    std::atomic<bool> shutdown_requested_{false};
+    std::vector<std::thread> io_threads_;
+    std::vector<std::thread> work_threads_;
+};
+
+// Declare StartServer so it's accessible for Pybind
+void StartServer(int32_t argc, char *argv[]);
+
+#endif // SHERPA_ONNX_ONLINE_WEBSOCKET_SERVER_H
diff --git a/sherpa-onnx/python/csrc/CMakeLists.txt b/sherpa-onnx/python/csrc/CMakeLists.txt
@@ -35,6 +35,7 @@ set(srcs
   online-transducer-model-config.cc
   online-wenet-ctc-model-config.cc
   online-zipformer2-ctc-model-config.cc
+  online-websocket-server-app.cc
   provider-config.cc
   sherpa-onnx.cc
   silero-vad-model-config.cc

diff --git a/sherpa-onnx/python/csrc/online-websocket-server-app.cc b/sherpa-onnx/python/csrc/online-websocket-server-app.cc
@@ -0,0 +1,40 @@
+// sherpa-onnx/python/csrc/online-websocket-server.cc
+//
+// Copyright (c)  2025 Uniphore (Author: Manickavela A)
+
+#include "sherpa-onnx/python/csrc/online-websocket-server-app.h"
+
+#include <string>
+
+#include "asio.hpp"
+#include "sherpa-onnx/csrc/online-websocket-server.h"
+#include "sherpa-onnx/csrc/macros.h"
+
+namespace sherpa_onnx {
+
+void StartServerWrapper(py::list args) {
+    int argc = args.size();
+    std::vector<std::string> args_str;  // Store actual strings
+    std::vector<char *> argv;           // Store pointers to those strings
+
+    for (const auto &arg : args) {
+        args_str.push_back(arg.cast<std::string>());
+    }
+
+    // Fill argv with pointers to the actual string data
+    for (auto &str : args_str) {
+        argv.push_back(str.data());
+    }
+
+    argv.push_back(nullptr);  // Null-terminate like C-style arrays
+
+    // Call your server
+    StartServer(argc, argv.data());
+}
+
+void PybindOnlineWebsocketServerWrapperApp(py::module *m) {
+    m->def("start_server", &StartServerWrapper, "Start the WebSocket server",
+    py::call_guard<py::gil_scoped_release>());
+}
+
+} // namespace sherpa_onnx
diff --git a/sherpa-onnx/python/csrc/online-websocket-server-app.h b/sherpa-onnx/python/csrc/online-websocket-server-app.h
@@ -0,0 +1,16 @@
+// sherpa-onnx/python/csrc/online-websocket-server.h
+//
+// Copyright (c)  2025 Uniphore (Author: Manickavela A)
+
+#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_WEBSOCKET_SERVER_APP_H_
+#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_WEBSOCKET_SERVER_APP_H_
+
+#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
+
+
+namespace sherpa_onnx {
+
+void PybindOnlineWebsocketServerWrapperApp(py::module *m);
+
+}
+#endif // SHERPA_ONNX_PYTHON_CSRC_ONLINE_WEBSOCKET_SERVER_H_
diff --git a/sherpa-onnx/python/csrc/sherpa-onnx.cc b/sherpa-onnx/python/csrc/sherpa-onnx.cc
@@ -23,6 +23,7 @@
 #include "sherpa-onnx/python/csrc/online-punctuation.h"
 #include "sherpa-onnx/python/csrc/online-recognizer.h"
 #include "sherpa-onnx/python/csrc/online-stream.h"
+#include "sherpa-onnx/python/csrc/online-websocket-server-app.h"
 #include "sherpa-onnx/python/csrc/speaker-embedding-extractor.h"
 #include "sherpa-onnx/python/csrc/speaker-embedding-manager.h"
 #include "sherpa-onnx/python/csrc/spoken-language-identification.h"
@@ -56,6 +57,7 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
   PybindOnlineModelConfig(&m);
   PybindOnlineLMConfig(&m);
   PybindOnlineStream(&m);
+  PybindOnlineWebsocketServerWrapperApp(&m);
   PybindEndpoint(&m);
   PybindOnlineRecognizer(&m);
   PybindKeywordSpotter(&m);

diff --git a/sherpa-onnx/python/sherpa_onnx/__init__.py b/sherpa-onnx/python/sherpa_onnx/__init__.py
@@ -46,4 +46,5 @@
 from .keyword_spotter import KeywordSpotter
 from .offline_recognizer import OfflineRecognizer
 from .online_recognizer import OnlineRecognizer
+from .online_websocket_server import OnlineWebSocketServer
 from .utils import text2token