third try

lexasub · lexasub · commit 6d0064847c82 · 2025-04-16T09:55:02.000+04:00
diff --git a/examples/websocket-stream/stream.cpp b/examples/websocket-stream/stream.cpp
@@ -1,199 +1,133 @@
-#include "common.h"
-#include "common-whisper.h"
 #include "whisper.h"
 #include "ixwebsocket/IXWebSocketServer.h"
-#include "ixwebsocket/IXNetSystem.h"
-
 #include <atomic>
-#include <chrono>
-#include <condition_variable>
-#include <cstdio>
-#include <fstream>
 #include <mutex>
-#include <string>
-#include <thread>
-#include <unordered_map>
-#include <vector>
+#include <queue>
+
+std::mutex g_ctx_mtx;
+whisper_context* g_ctx = nullptr;
+constexpr int CHUNK_SIZE = 3 * 16000; 
 
 struct ClientSession {
     std::vector<float> pcm_buffer;
     std::mutex mtx;
-    std::condition_variable cv;
-    std::atomic<bool> active{false};
-    std::atomic<bool> terminate{false};
-};
-
-struct server_params {
-    int32_t port = 9002;
-    int32_t n_threads = std::min(4, (int32_t)std::thread::hardware_concurrency());
-    std::string model = "models/ggml-base.en.bin";
-    bool use_gpu = true;
+    std::atomic<bool> active{true};
 };
 
 class WhisperServer {
 private:
-    server_params params;
     ix::WebSocketServer server;
     std::unordered_map<std::string, std::unique_ptr<ClientSession>> clients;
     std::mutex clients_mtx;
-    whisper_context* ctx = nullptr;
+    std::thread processor_thread;
+    std::atomic<bool> running{true};
 
 public:
-    WhisperServer(const server_params& params) : params(params), server(params.port, "0.0.0.0") {
-        ix::initNetSystem();
-        
+    WhisperServer(int port, const std::string& model_path) : server(port, "0.0.0.0") {
         whisper_context_params cparams = whisper_context_default_params();
-        cparams.use_gpu = params.use_gpu;
-        ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
+        cparams.use_gpu = true;
+        g_ctx = whisper_init_from_file(model_path.c_str());
         
         server.setTLSOptions({});
-	server.setOnClientMessageCallback([this](std::shared_ptr<ix::ConnectionState> connectionState, ix::WebSocket& webSocket, const ix::WebSocketMessagePtr& msg) {
-            this->handleMessage(connectionState, webSocket, msg);
+        server.setOnClientMessageCallback([this](auto&&... args) {
+            handleMessage(args...);
         });
+        
+        processor_thread = std::thread([this] { processQueues(); });
     }
 
     ~WhisperServer() {
+        running = false;
         server.stop();
-        ix::uninitNetSystem();
-        if(ctx) whisper_free(ctx);
+        if (processor_thread.joinable()) processor_thread.join();
+        whisper_free(g_ctx);
     }
 
     void run() {
         server.listenAndStart();
-        fprintf(stderr, "Server started on port %d\n", params.port);
-        
-        while(true) {
-            std::this_thread::sleep_for(std::chrono::seconds(1));
-        }
+        while (running) std::this_thread::sleep_for(std::chrono::seconds(1));
     }
 
 private:
-    void handleMessage(std::shared_ptr<ix::ConnectionState> connectionState, ix::WebSocket& webSocket, const ix::WebSocketMessagePtr& msg) {
-        const auto client_id = connectionState->getId();
+    void handleMessage(std::shared_ptr<ix::ConnectionState> state, 
+                      ix::WebSocket& ws, 
+                      const ix::WebSocketMessagePtr& msg) {
+        const std::string client_id = state->getId();
         
-        if(msg->type == ix::WebSocketMessageType::Open) {
-            fprintf(stderr, "New client connected: %s\n", client_id);
+        if (msg->type == ix::WebSocketMessageType::Open) {
             std::lock_guard<std::mutex> lock(clients_mtx);
             clients[client_id] = std::make_unique<ClientSession>();
-            clients[client_id]->active = true;
-            std::thread(&WhisperServer::processClientAudio, this, client_id).detach();
         }
-        else if(msg->type == ix::WebSocketMessageType::Close) {
-            fprintf(stderr, "Client disconnected: %s\n", client_id);
+        else if (msg->type == ix::WebSocketMessageType::Close) {
             std::lock_guard<std::mutex> lock(clients_mtx);
-            if(clients.count(client_id)) {
-                clients[client_id]->terminate = true;
-                clients[client_id]->cv.notify_one();
+            if (clients.count(client_id)) {
+                clients[client_id]->active = false;
                 clients.erase(client_id);
             }
         }
-        else if(msg->type == ix::WebSocketMessageType::Message) {
-            //std::lock_guard<std::mutex> lock(clients_mtx);
+        else if (msg->type == ix::WebSocketMessageType::Message && msg->binary) {
+            std::lock_guard<std::mutex> lock(clients_mtx);
+            if (!clients.count(client_id)) return;
 
-            if(auto it = clients.find(client_id); it != clients.end()) {
-                auto& session = *it->second;
-                std::lock_guard<std::mutex> session_lock(session.mtx);
-                
-	       	if (!msg->binary) {
-                    webSocket.sendText("Error: Expected binary data");
-                    fprintf(stderr, "Client %s sent text data\n", client_id.c_str());
-                    return;
-                }
+            auto& session = *clients[client_id];
+            const auto& data = msg->str;
+            const int16_t* pcm16 = reinterpret_cast<const int16_t*>(data.data());
+            size_t n_samples = data.size() / sizeof(int16_t);
 
-                const auto &data = msg->str;
-                size_t data_size = data.size();
-                
-                if (data_size % sizeof(int16_t) != 0) {
-                    webSocket.sendText("Error: Invalid data size");
-                    fprintf(stderr, "Invalid data size from %s: %zu\n", client_id.c_str(), data_size);
-                    return;
-                }	
-                //PCM16 -> FLOAT32
-                const int16_t* pcm16 = reinterpret_cast<const int16_t*>(data.data());
-                const size_t num_samples =  data_size / sizeof(int16_t);
-                
-                session.pcm_buffer.reserve(session.pcm_buffer.size() + num_samples);
-                for(size_t i = 0; i < num_samples; ++i) {
-                    session.pcm_buffer.push_back(pcm16[i] / 32768.0f);
-                }
-                
-                session.cv.notify_one();
+            std::lock_guard<std::mutex> session_lock(session.mtx);
+            for (size_t i = 0; i < n_samples; i++) {
+                session.pcm_buffer.push_back(pcm16[i] / 32768.0f);
             }
         }
     }
 
-    void processClientAudio(std::string client_id) {
-        constexpr int step_ms = 300;
-        constexpr int n_samples_step = (1e-3 * step_ms) * WHISPER_SAMPLE_RATE;
-        
-        fprintf(stderr, "Started thread for: %s\n", client_id);
-        while(true) {
-            std::vector<float> audio_chunk;
-            {
-
-        	//fprintf(stderr, "Started read chunk from: %s\n", client_id);
-                std::unique_lock<std::mutex> lock(clients_mtx);
-                if(!clients.count(client_id)) break;
-                auto& session = *clients[client_id];
-                
-                std::unique_lock<std::mutex> session_lock(session.mtx);
-                session.cv.wait_for(session_lock, std::chrono::milliseconds(100), [&session] {
-                    return session.pcm_buffer.size() >= n_samples_step || session.terminate;
-                });
-                
-                if(session.terminate) break;
-                
-                if(session.pcm_buffer.size() >= n_samples_step) {
-                    audio_chunk.assign(session.pcm_buffer.begin(), session.pcm_buffer.begin() + n_samples_step);
-                    session.pcm_buffer.erase(session.pcm_buffer.begin(), session.pcm_buffer.begin() + n_samples_step);
-                }
-
-		size_t available = session.pcm_buffer.size();
-                if(available >= n_samples_step) {
-                    size_t take = std::min(available, (size_t)n_samples_step);
-                    audio_chunk.assign(
-                        session.pcm_buffer.begin(),
-                        session.pcm_buffer.begin() + take
-                    );
-                    session.pcm_buffer.erase(
-                        session.pcm_buffer.begin(),
-                        session.pcm_buffer.begin() + take
-                    );
-                }
-        	//fprintf(stderr, "End of read chunk: %s\n", client_id);
-            }
+    void processQueues() {
+        while (running) {
+            std::this_thread::sleep_for(std::chrono::milliseconds(100));
             
-            if(!audio_chunk.empty()) {
-
-        	//fprintf(stderr, "Good, chunk not empty for: %s\n", client_id);
-                whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH);
-                wparams.n_threads = params.n_threads;
-		wparams.language = "en";                    
-		wparams.print_realtime   = false;
-		wparams.print_progress   = false;
-		wparams.single_segment   = true;            
-		wparams.max_tokens       = 32;              
-                
-                if(whisper_full(ctx, wparams, audio_chunk.data(), audio_chunk.size()) == 0) {
-
-        	    fprintf(stderr, "whisper_full == 0: %s\n", client_id);
-                    const int n_segments = whisper_full_n_segments(ctx);
-                    for(int i = 0; i < n_segments; ++i) {
-                        const char* text = whisper_full_get_segment_text(ctx, i);
-                        fprintf(stdout, "[Client %s] %s\n", client_id, text);
+            std::lock_guard<std::mutex> lock(clients_mtx);
+            for (auto& [id, session] : clients) {
+                std::lock_guard<std::mutex> session_lock(session->mtx);
+                if (session->pcm_buffer.size() < CHUNK_SIZE) continue;
+
+                std::vector<float> chunk(
+                    session->pcm_buffer.begin(),
+                    session->pcm_buffer.begin() + CHUNK_SIZE
+                );
+                session->pcm_buffer.erase(
+                    session->pcm_buffer.begin(),
+                    session->pcm_buffer.begin() + CHUNK_SIZE
+                );
+
+                {
+                    std::lock_guard<std::mutex> ctx_lock(g_ctx_mtx);
+                    whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
+                    wparams.print_progress = false;
+                    wparams.print_realtime = false;
+                    wparams.single_segment = true;
+                    
+                    if (whisper_full(g_ctx, wparams, chunk.data(), chunk.size()) == 0) {
+                        const char* text = whisper_full_get_segment_text(g_ctx, 0);
+                        printf("[%s] %s\n", id.c_str(), text);
                     }
+                    whisper_reset_timings(g_ctx);
                 }
             }
         }
     }
 };
 
 int main(int argc, char** argv) {
-    server_params params;
-    params.port = 9002;
-    params.model = "ggml-large-v3-turbo.bin";
+    if (argc < 3) {
+	//fprintf(stderr, "Usage: %s <port> <model_path>\n", argv[0]);
+	
+    	WhisperServer server(9002, "ggml-large-v3-turbo.bin");
+	server.run();
+        return 0;
+    }
     
-    WhisperServer server(params);
+    WhisperServer server(atoi(argv[1]), argv[2]);
     server.run();
     return 0;
-}
+}                    
+